Search in sources :

Example 31 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class TestFrameworkTestRun method testCrossNSMapperDatasetAccess.

@Category(SlowTests.class)
@Test
public void testCrossNSMapperDatasetAccess() throws Exception {
    NamespaceMeta inputNS = new NamespaceMeta.Builder().setName("inputNS").build();
    NamespaceMeta outputNS = new NamespaceMeta.Builder().setName("outputNS").build();
    getNamespaceAdmin().create(inputNS);
    getNamespaceAdmin().create(outputNS);
    addDatasetInstance(inputNS.getNamespaceId().dataset("table1"), "keyValueTable");
    addDatasetInstance(outputNS.getNamespaceId().dataset("table2"), "keyValueTable");
    DataSetManager<KeyValueTable> tableManager = getDataset(inputNS.getNamespaceId().dataset("table1"));
    KeyValueTable inputTable = tableManager.get();
    inputTable.write("hello", "world");
    tableManager.flush();
    ApplicationManager appManager = deployApplication(DatasetCrossNSAccessWithMAPApp.class);
    Map<String, String> argsForMR = ImmutableMap.of(DatasetCrossNSAccessWithMAPApp.INPUT_DATASET_NS, inputNS.getName(), DatasetCrossNSAccessWithMAPApp.INPUT_DATASET_NAME, "table1", DatasetCrossNSAccessWithMAPApp.OUTPUT_DATASET_NS, outputNS.getName(), DatasetCrossNSAccessWithMAPApp.OUTPUT_DATASET_NAME, "table2");
    MapReduceManager mrManager = appManager.getMapReduceManager(DatasetCrossNSAccessWithMAPApp.MAPREDUCE_PROGRAM).start(argsForMR);
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    appManager.stopAll();
    DataSetManager<KeyValueTable> outTableManager = getDataset(outputNS.getNamespaceId().dataset("table2"));
    verifyMapperJobOutput(DatasetCrossNSAccessWithMAPApp.class, outTableManager);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 32 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class DataStreamsSparkSinkTest method testSparkSink.

private void testSparkSink(ApplicationManager appManager, final String output) throws Exception {
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start(ImmutableMap.of("tablename", output));
    sparkManager.waitForStatus(true, 10, 1);
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            return getDataset(output).get() != null;
        }
    }, 1, TimeUnit.MINUTES);
    final DataSetManager<KeyValueTable> outputManager = getDataset(output);
    final Map<String, String> expectedKeyValues = ImmutableMap.of("0", "samuel", "1", "jackson", "2", "dwayne", "3", "johnson");
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            outputManager.flush();
            Map<String, String> keyValues = co.cask.cdap.etl.mock.spark.streaming.MockSink.getValues(expectedKeyValues.keySet(), outputManager);
            return expectedKeyValues.equals(keyValues);
        }
    }, 1, TimeUnit.MINUTES);
    sparkManager.stop();
    sparkManager.waitForStatus(false, 10, 1);
    sparkManager.waitForRun(ProgramRunStatus.KILLED, 10, TimeUnit.SECONDS);
}
Also used : SparkManager(co.cask.cdap.test.SparkManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 33 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class DynamicPartitionerWithAvroTest method runDynamicPartitionerMR.

private void runDynamicPartitionerMR(final List<? extends GenericRecord> records, boolean allowConcurrentWriters, final boolean precreatePartitions, @Nullable final DynamicPartitioner.PartitionWriteOption partitionWriteOption, boolean expectedStatus) throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingAvroDynamicPartitioner.class);
    final long now = System.currentTimeMillis();
    final Multimap<PartitionKey, GenericRecord> keyToRecordsMap = groupByPartitionKey(records, now);
    // write values to the input kvTable
    final KeyValueTable kvTable = datasetCache.getDataset(INPUT_DATASET);
    Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            // the keys are not used; it matters that they're unique though
            for (int i = 0; i < records.size(); i++) {
                kvTable.write(Integer.toString(i), records.get(i).toString());
            }
        }
    });
    final PartitionedFileSet pfs = datasetCache.getDataset(OUTPUT_DATASET);
    if (precreatePartitions) {
        Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws IOException {
                writeFile(pfs, createKey(now, 95111));
                writeFile(pfs, createKey(now, 98123));
                writeFile(pfs, createKey(now, 84125));
            }
        });
    }
    String allowConcurrencyKey = "dataset." + OUTPUT_DATASET + "." + PartitionedFileSetArguments.DYNAMIC_PARTITIONER_ALLOW_CONCURRENCY;
    // run the partition writer m/r with this output partition time
    Map<String, String> arguments = new HashMap<>();
    arguments.put(OUTPUT_PARTITION_KEY, Long.toString(now));
    arguments.put(allowConcurrencyKey, Boolean.toString(allowConcurrentWriters));
    if (partitionWriteOption != null) {
        arguments.put("partitionWriteOption", partitionWriteOption.name());
    }
    long startTime = System.currentTimeMillis();
    boolean status = runProgram(app, AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.class, new BasicArguments(arguments));
    Assert.assertEquals(expectedStatus, status);
    if (!expectedStatus) {
        // if we expect the program to fail, no need to check the output data for expected results
        return;
    }
    // Verify notifications
    List<Notification> notifications = getDataNotifications(startTime);
    Assert.assertEquals(1, notifications.size());
    Assert.assertEquals(NamespaceId.DEFAULT.dataset(OUTPUT_DATASET), DatasetId.fromString(notifications.get(0).getProperties().get("datasetId")));
    // this should have created a partition in the pfs
    final Location pfsBaseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws IOException {
            Map<PartitionKey, PartitionDetail> partitions = new HashMap<>();
            for (PartitionDetail partition : pfs.getPartitions(null)) {
                partitions.put(partition.getPartitionKey(), partition);
                // check that the mapreduce wrote the output partition metadata to all the output partitions
                Assert.assertEquals(getExpectedMetadata(precreatePartitions, partitionWriteOption), partition.getMetadata().asMap());
                // if files were precreated, and the option is to append, expect the empty file to exist
                // if partition write option is configured to overwrite, then the file is expected to not exist
                Location preexistingFile = partition.getLocation().append("file");
                if (precreatePartitions && partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_APPEND) {
                    Assert.assertTrue(preexistingFile.exists());
                    try (InputStream inputStream = preexistingFile.getInputStream()) {
                        Assert.assertEquals(-1, inputStream.read());
                    }
                } else {
                    Assert.assertFalse(preexistingFile.exists());
                }
            }
            Assert.assertEquals(3, partitions.size());
            Assert.assertEquals(keyToRecordsMap.keySet(), partitions.keySet());
            // Check relative paths of the partitions. Also check that their location = pfs baseLocation + relativePath
            for (Map.Entry<PartitionKey, PartitionDetail> partitionKeyEntry : partitions.entrySet()) {
                PartitionDetail partitionDetail = partitionKeyEntry.getValue();
                String relativePath = partitionDetail.getRelativePath();
                int zip = (int) partitionKeyEntry.getKey().getField("zip");
                Assert.assertEquals(Long.toString(now) + Path.SEPARATOR + zip, relativePath);
                Assert.assertEquals(pfsBaseLocation.append(relativePath), partitionDetail.getLocation());
            }
            for (Map.Entry<PartitionKey, Collection<GenericRecord>> keyToRecordsEntry : keyToRecordsMap.asMap().entrySet()) {
                Set<GenericRecord> genericRecords = new HashSet<>(keyToRecordsEntry.getValue());
                Assert.assertEquals(genericRecords, readOutput(partitions.get(keyToRecordsEntry.getKey()).getLocation()));
            }
        }
    });
}
Also used : HashSet(java.util.HashSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Set(java.util.Set) HashMap(java.util.HashMap) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) Notification(co.cask.cdap.proto.Notification) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) GenericRecord(org.apache.avro.generic.GenericRecord) InputStream(java.io.InputStream) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) IOException(java.io.IOException) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Location(org.apache.twill.filesystem.Location)

Example 34 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testMapreduceWithObjectStore.

@Test
public void testMapreduceWithObjectStore() throws Exception {
    // Deploy apps to another namespace and test cross-namespace access meanwhile
    final ApplicationWithPrograms app = deployApp(Id.Namespace.fromEntityId(new NamespaceId("someOtherNameSpace")), AppWithMapReduceUsingObjectStore.class);
    final ObjectStore<String> input = datasetCache.getDataset("someOtherNameSpace", "keys");
    // Get dataset from a non existing namespace
    try {
        datasetCache.getDataset("nonExistingNameSpace", "keys");
        Assert.fail("getDataset() should throw an exception when accessing dataset from a non-existing namespace.");
    } catch (DatasetInstantiationException e) {
    // expected
    }
    final String testString = "persisted data";
    // Populate some input
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) input).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            input.write(Bytes.toBytes(testString), testString);
            input.write(Bytes.toBytes("distributed systems"), "distributed systems");
        }
    });
    runProgram(app, AppWithMapReduceUsingObjectStore.ComputeCounts.class, false, true);
    final KeyValueTable output = datasetCache.getDataset("someOtherNameSpace", "count");
    // read output and verify result
    Transactions.createTransactionExecutor(txExecutorFactory, output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            byte[] val = output.read(Bytes.toBytes(testString));
            Assert.assertTrue(val != null);
            Assert.assertEquals(Bytes.toString(val), Integer.toString(testString.length()));
            val = output.read(Bytes.toBytes("distributed systems"));
            Assert.assertTrue(val != null);
            Assert.assertEquals(Bytes.toString(val), "19");
        }
    });
}
Also used : ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) TransactionAware(org.apache.tephra.TransactionAware) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) TransactionExecutor(org.apache.tephra.TransactionExecutor) NamespaceId(co.cask.cdap.proto.id.NamespaceId) DatasetInstantiationException(co.cask.cdap.api.data.DatasetInstantiationException) Test(org.junit.Test)

Example 35 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testFailureInInit.

private void testFailureInInit(final String expected, ApplicationWithPrograms app, Class<?> programClass, Map<String, String> args) throws Exception {
    // We want to verify that when a mapreduce fails during initialize(), especially
    // if an input or output format provider fails to produce its configuration, the
    // writes by that initialize() method are rolled back. (Background: prior to
    // CDAP-7476, the input/output format provider was called *after* initialize
    // returns, and therefore that transaction may have been committed already.
    // (1) initialize the table with a known value
    datasetCache.newTransactionContext();
    final KeyValueTable kvTable = datasetCache.getDataset("recorder");
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            // the table should not have initialized=true
            kvTable.write("initialized", "false");
        }
    });
    // 2) run job
    runProgram(app, programClass, args, false);
    // 3) verify results
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            // the table should not have initialized=true
            Assert.assertEquals(expected, Bytes.toString(kvTable.read("initialized")));
        }
    });
    datasetCache.dismissTransactionContext();
}
Also used : KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) TransactionExecutor(org.apache.tephra.TransactionExecutor)

Aggregations

KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)84 Test (org.junit.Test)49 ApplicationManager (co.cask.cdap.test.ApplicationManager)45 SparkManager (co.cask.cdap.test.SparkManager)25 StreamManager (co.cask.cdap.test.StreamManager)16 IOException (java.io.IOException)16 TransactionExecutor (org.apache.tephra.TransactionExecutor)12 ApplicationWithPrograms (co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)11 HashMap (java.util.HashMap)11 ArrayList (java.util.ArrayList)10 FileSet (co.cask.cdap.api.dataset.lib.FileSet)8 KeyValue (co.cask.cdap.api.dataset.lib.KeyValue)8 Table (co.cask.cdap.api.dataset.table.Table)8 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)8 ObjectStore (co.cask.cdap.api.dataset.lib.ObjectStore)7 MapReduceManager (co.cask.cdap.test.MapReduceManager)7 ServiceManager (co.cask.cdap.test.ServiceManager)7 WorkflowManager (co.cask.cdap.test.WorkflowManager)7 Set (java.util.Set)7 Category (org.junit.experimental.categories.Category)7