use of io.cdap.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.
the class BaseRDDCollection method createMultiStoreTask.
@Override
public Runnable createMultiStoreTask(PhaseSpec phaseSpec, Set<String> group, Set<String> sinks, Map<String, StageStatisticsCollector> collectors) {
return new Runnable() {
@Override
public void run() {
PairFlatMapFunction<T, String, KeyValue<Object, Object>> multiSinkFunction = (PairFlatMapFunction<T, String, KeyValue<Object, Object>>) new MultiSinkFunction(sec, phaseSpec, group, collectors);
JavaPairRDD<String, KeyValue<Object, Object>> taggedOutput = rdd.flatMapToPair(multiSinkFunction);
for (String outputName : sinkFactory.writeCombinedRDD(taggedOutput, sec, sinks)) {
recordLineage(outputName);
}
}
};
}
use of io.cdap.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.
the class TestFrameworkTestRun method assertWorkerDatasetWrites.
private void assertWorkerDatasetWrites(byte[] startRow, byte[] endRow, int expectedCount, int expectedTotalCount) throws Exception {
DataSetManager<KeyValueTable> datasetManager = getDataset(testSpace.dataset(AppUsingGetServiceURL.WORKER_INSTANCES_DATASET));
KeyValueTable instancesTable = datasetManager.get();
try (CloseableIterator<KeyValue<byte[], byte[]>> instancesIterator = instancesTable.scan(startRow, endRow)) {
List<KeyValue<byte[], byte[]>> workerInstances = Lists.newArrayList(instancesIterator);
// Assert that the worker starts with expectedCount instances
Assert.assertEquals(expectedCount, workerInstances.size());
// Assert that each instance of the worker knows the total number of instances
for (KeyValue<byte[], byte[]> keyValue : workerInstances) {
Assert.assertEquals(expectedTotalCount, Bytes.toInt(keyValue.getValue()));
}
}
}
use of io.cdap.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.
the class AuthorizationTest method assertDatasetIsEmpty.
private void assertDatasetIsEmpty(NamespaceId namespaceId, String datasetName) throws Exception {
DataSetManager<KeyValueTable> outTableManager = getDataset(namespaceId.dataset(datasetName));
KeyValueTable outputTable = outTableManager.get();
try (CloseableIterator<KeyValue<byte[], byte[]>> scanner = outputTable.scan(null, null)) {
Assert.assertFalse(scanner.hasNext());
}
}
use of io.cdap.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.
the class SparkTest method testTransaction.
@Test
public void testTransaction() throws Exception {
ApplicationManager applicationManager = deploy(TestSparkApp.class);
// Write some data to a local file
File inputFile = TEMP_FOLDER.newFile();
try (PrintWriter writer = new PrintWriter(Files.newBufferedWriter(inputFile.toPath(), StandardCharsets.UTF_8))) {
writer.println("red fox");
writer.println("brown fox");
writer.println("grey fox");
writer.println("brown bear");
writer.println("black bear");
}
// Run the spark program
SparkManager sparkManager = applicationManager.getSparkManager(TransactionSpark.class.getSimpleName());
sparkManager.start(ImmutableMap.of("input.file", inputFile.getAbsolutePath(), "keyvalue.table", "KeyValueTable", "result.all.dataset", "SparkResult", "result.threshold", "2", "result.threshold.dataset", "SparkThresholdResult"));
// Verify result from dataset before the Spark program terminates
final DataSetManager<KeyValueTable> resultManager = getDataset("SparkThresholdResult");
final KeyValueTable resultTable = resultManager.get();
// Expect the threshold result dataset, with threshold >=2, contains [brown, fox, bear]
Tasks.waitFor(ImmutableSet.of("brown", "fox", "bear"), () -> {
// This is to start a new TX
resultManager.flush();
LOG.info("Reading from threshold result");
try (CloseableIterator<KeyValue<byte[], byte[]>> itor = resultTable.scan(null, null)) {
return ImmutableSet.copyOf(Iterators.transform(itor, input -> {
String word = Bytes.toString(input.getKey());
LOG.info("{}, {}", word, Bytes.toInt(input.getValue()));
return word;
}));
}
}, 3, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
sparkManager.stop();
sparkManager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
}
use of io.cdap.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.
the class SparkTest method validateGetDatasetOutput.
private void validateGetDatasetOutput(KeyValueTable logStatsTable) {
SparkAppUsingGetDataset.LogKey fredKey1 = new SparkAppUsingGetDataset.LogKey("10.10.10.10", "FRED", "GET http://bar.com/image.jpg HTTP/1.1", 200);
SparkAppUsingGetDataset.LogKey fredKey2 = new SparkAppUsingGetDataset.LogKey("10.10.10.10", "FRED", "GET http://bar.com/image.jpg HTTP/1.1", 404);
SparkAppUsingGetDataset.LogKey bradKey1 = new SparkAppUsingGetDataset.LogKey("20.20.20.20", "BRAD", "GET http://bar.com/image.jpg HTTP/1.1", 200);
SparkAppUsingGetDataset.LogKey bradKey2 = new SparkAppUsingGetDataset.LogKey("20.20.20.20", "BRAD", "GET http://bar.com/image.jpg HTTP/1.1", 404);
SparkAppUsingGetDataset.LogStats fredStats1 = new SparkAppUsingGetDataset.LogStats(2, 100);
SparkAppUsingGetDataset.LogStats fredStats2 = new SparkAppUsingGetDataset.LogStats(1, 50);
SparkAppUsingGetDataset.LogStats bradStats1 = new SparkAppUsingGetDataset.LogStats(1, 50);
SparkAppUsingGetDataset.LogStats bradStats2 = new SparkAppUsingGetDataset.LogStats(1, 50);
Map<SparkAppUsingGetDataset.LogKey, SparkAppUsingGetDataset.LogStats> expected = ImmutableMap.of(fredKey1, fredStats1, fredKey2, fredStats2, bradKey1, bradStats1, bradKey2, bradStats2);
try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
// must have 4 records
for (int i = 0; i < 4; i++) {
Assert.assertTrue("Expected next for i = " + i, scan.hasNext());
KeyValue<byte[], byte[]> next = scan.next();
SparkAppUsingGetDataset.LogKey logKey = new Gson().fromJson(Bytes.toString(next.getKey()), SparkAppUsingGetDataset.LogKey.class);
SparkAppUsingGetDataset.LogStats logStats = new Gson().fromJson(Bytes.toString(next.getValue()), SparkAppUsingGetDataset.LogStats.class);
Assert.assertEquals(expected.get(logKey), logStats);
}
// no more records
Assert.assertFalse(scan.hasNext());
}
}
Aggregations