Search in sources :

Example 31 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.

the class DynamicPartitionerWithAvroTest method runDynamicPartitionerMR.

private void runDynamicPartitionerMR(final List<? extends GenericRecord> records, boolean allowConcurrentWriters, final boolean precreatePartitions, @Nullable final DynamicPartitioner.PartitionWriteOption partitionWriteOption, boolean expectedStatus) throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingAvroDynamicPartitioner.class);
    final long now = System.currentTimeMillis();
    final Multimap<PartitionKey, GenericRecord> keyToRecordsMap = groupByPartitionKey(records, now);
    // write values to the input kvTable
    final KeyValueTable kvTable = datasetCache.getDataset(INPUT_DATASET);
    Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            // the keys are not used; it matters that they're unique though
            for (int i = 0; i < records.size(); i++) {
                kvTable.write(Integer.toString(i), records.get(i).toString());
            }
        }
    });
    final PartitionedFileSet pfs = datasetCache.getDataset(OUTPUT_DATASET);
    if (precreatePartitions) {
        Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws IOException {
                writeFile(pfs, createKey(now, 95111));
                writeFile(pfs, createKey(now, 98123));
                writeFile(pfs, createKey(now, 84125));
            }
        });
    }
    String allowConcurrencyKey = "dataset." + OUTPUT_DATASET + "." + PartitionedFileSetArguments.DYNAMIC_PARTITIONER_ALLOW_CONCURRENCY;
    // run the partition writer m/r with this output partition time
    Map<String, String> arguments = new HashMap<>();
    arguments.put(OUTPUT_PARTITION_KEY, Long.toString(now));
    arguments.put(allowConcurrencyKey, Boolean.toString(allowConcurrentWriters));
    if (partitionWriteOption != null) {
        arguments.put("partitionWriteOption", partitionWriteOption.name());
    }
    long startTime = System.currentTimeMillis();
    boolean status = runProgram(app, AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.class, new BasicArguments(arguments));
    Assert.assertEquals(expectedStatus, status);
    if (!expectedStatus) {
        // if we expect the program to fail, no need to check the output data for expected results
        return;
    }
    // Verify notifications
    List<Notification> notifications = getDataNotifications(startTime);
    Assert.assertEquals(1, notifications.size());
    Assert.assertEquals(NamespaceId.DEFAULT.dataset(OUTPUT_DATASET), DatasetId.fromString(notifications.get(0).getProperties().get("datasetId")));
    // this should have created a partition in the pfs
    final Location pfsBaseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws IOException {
            Map<PartitionKey, PartitionDetail> partitions = new HashMap<>();
            for (PartitionDetail partition : pfs.getPartitions(null)) {
                partitions.put(partition.getPartitionKey(), partition);
                // check that the mapreduce wrote the output partition metadata to all the output partitions
                Assert.assertEquals(getExpectedMetadata(precreatePartitions, partitionWriteOption), partition.getMetadata().asMap());
                // if files were precreated, and the option is to append, expect the empty file to exist
                // if partition write option is configured to overwrite, then the file is expected to not exist
                Location preexistingFile = partition.getLocation().append("file");
                if (precreatePartitions && partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_APPEND) {
                    Assert.assertTrue(preexistingFile.exists());
                    try (InputStream inputStream = preexistingFile.getInputStream()) {
                        Assert.assertEquals(-1, inputStream.read());
                    }
                } else {
                    Assert.assertFalse(preexistingFile.exists());
                }
            }
            Assert.assertEquals(3, partitions.size());
            Assert.assertEquals(keyToRecordsMap.keySet(), partitions.keySet());
            // Check relative paths of the partitions. Also check that their location = pfs baseLocation + relativePath
            for (Map.Entry<PartitionKey, PartitionDetail> partitionKeyEntry : partitions.entrySet()) {
                PartitionDetail partitionDetail = partitionKeyEntry.getValue();
                String relativePath = partitionDetail.getRelativePath();
                int zip = (int) partitionKeyEntry.getKey().getField("zip");
                Assert.assertEquals(Long.toString(now) + Path.SEPARATOR + zip, relativePath);
                Assert.assertEquals(pfsBaseLocation.append(relativePath), partitionDetail.getLocation());
            }
            for (Map.Entry<PartitionKey, Collection<GenericRecord>> keyToRecordsEntry : keyToRecordsMap.asMap().entrySet()) {
                Set<GenericRecord> genericRecords = new HashSet<>(keyToRecordsEntry.getValue());
                Assert.assertEquals(genericRecords, readOutput(partitions.get(keyToRecordsEntry.getKey()).getLocation()));
            }
        }
    });
}
Also used : HashSet(java.util.HashSet) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) Set(java.util.Set) HashMap(java.util.HashMap) PartitionDetail(io.cdap.cdap.api.dataset.lib.PartitionDetail) Notification(io.cdap.cdap.proto.Notification) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) GenericRecord(org.apache.avro.generic.GenericRecord) InputStream(java.io.InputStream) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) IOException(java.io.IOException) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Location(org.apache.twill.filesystem.Location)

Example 32 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.

the class CoreDatasetsModule method register.

@Override
public void register(DatasetDefinitionRegistry registry) {
    DatasetDefinition<Table, DatasetAdmin> tableDef = registry.get("table");
    DatasetDefinition<KeyValueTable, DatasetAdmin> kvTableDef = new KeyValueTableDefinition(KeyValueTable.TYPE, tableDef);
    registry.add(kvTableDef);
    registry.add(new KeyValueTableDefinition(KeyValueTable.class.getName(), tableDef));
    DatasetDefinition<ObjectStore, DatasetAdmin> objectStoreDef = new ObjectStoreDefinition(ObjectStore.TYPE, kvTableDef);
    registry.add(new ObjectStoreDefinition(ObjectStore.TYPE, kvTableDef));
    registry.add(new ObjectStoreDefinition(ObjectStore.class.getName(), kvTableDef));
    registry.add(new IndexedObjectStoreDefinition(IndexedObjectStore.TYPE, tableDef, objectStoreDef));
    registry.add(new IndexedObjectStoreDefinition(IndexedObjectStore.class.getName(), tableDef, objectStoreDef));
    registry.add(new IndexedTableDefinition(IndexedTable.TYPE, tableDef));
    registry.add(new IndexedTableDefinition(IndexedTable.class.getName(), tableDef));
    registry.add(new TimeseriesTableDefinition(TimeseriesTable.TYPE, tableDef));
    registry.add(new TimeseriesTableDefinition(TimeseriesTable.class.getName(), tableDef));
    registry.add(new CounterTimeseriesTableDefinition(CounterTimeseriesTable.TYPE, tableDef));
    registry.add(new CounterTimeseriesTableDefinition(CounterTimeseriesTable.class.getName(), tableDef));
    // in-memory table
    registry.add(new InMemoryTableDefinition(InMemoryTable.TYPE));
}
Also used : ObjectStore(io.cdap.cdap.api.dataset.lib.ObjectStore) IndexedObjectStore(io.cdap.cdap.api.dataset.lib.IndexedObjectStore) Table(io.cdap.cdap.api.dataset.table.Table) InMemoryTable(io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryTable) CounterTimeseriesTable(io.cdap.cdap.api.dataset.lib.CounterTimeseriesTable) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable) IndexedTable(io.cdap.cdap.api.dataset.lib.IndexedTable) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) InMemoryTableDefinition(io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryTableDefinition) CounterTimeseriesTableDefinition(io.cdap.cdap.api.dataset.lib.CounterTimeseriesTableDefinition) IndexedTableDefinition(io.cdap.cdap.api.dataset.lib.IndexedTableDefinition) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) IndexedObjectStoreDefinition(io.cdap.cdap.api.dataset.lib.IndexedObjectStoreDefinition) KeyValueTableDefinition(io.cdap.cdap.api.dataset.lib.KeyValueTableDefinition) IndexedObjectStoreDefinition(io.cdap.cdap.api.dataset.lib.IndexedObjectStoreDefinition) TimeseriesTableDefinition(io.cdap.cdap.api.dataset.lib.TimeseriesTableDefinition) CounterTimeseriesTableDefinition(io.cdap.cdap.api.dataset.lib.CounterTimeseriesTableDefinition)

Example 33 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.

the class TestFrameworkTestRun method testAppWithPlugin.

@Test
public void testAppWithPlugin() throws Exception {
    ArtifactId artifactId = NamespaceId.DEFAULT.artifact("app-with-plugin", "1.0.0-SNAPSHOT");
    addAppArtifact(artifactId, AppWithPlugin.class);
    ArtifactId pluginArtifactId = NamespaceId.DEFAULT.artifact("test-plugin", "1.0.0-SNAPSHOT");
    addPluginArtifact(pluginArtifactId, artifactId, ToStringPlugin.class);
    ApplicationId appId = NamespaceId.DEFAULT.app("AppWithPlugin");
    AppRequest createRequest = new AppRequest(new ArtifactSummary(artifactId.getArtifact(), artifactId.getVersion()));
    ApplicationManager appManager = deployApplication(appId, createRequest);
    final WorkerManager workerManager = appManager.getWorkerManager(AppWithPlugin.WORKER);
    workerManager.start();
    workerManager.waitForRun(ProgramRunStatus.COMPLETED, 10, TimeUnit.SECONDS);
    final ServiceManager serviceManager = appManager.getServiceManager(AppWithPlugin.SERVICE);
    serviceManager.start();
    serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    URL serviceURL = serviceManager.getServiceURL(5, TimeUnit.SECONDS);
    callServiceGet(serviceURL, "dummy");
    serviceManager.stop();
    serviceManager.waitForStopped(10, TimeUnit.SECONDS);
    WorkflowManager workflowManager = appManager.getWorkflowManager(AppWithPlugin.WORKFLOW);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    List<RunRecord> runRecords = workflowManager.getHistory();
    Assert.assertNotEquals(ProgramRunStatus.FAILED, runRecords.get(0).getStatus());
    DataSetManager<KeyValueTable> workflowTableManager = getDataset(AppWithPlugin.WORKFLOW_TABLE);
    String value = Bytes.toString(workflowTableManager.get().read("val"));
    Assert.assertEquals(AppWithPlugin.TEST, value);
    Map<String, String> workflowTags = ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, NamespaceId.DEFAULT.getNamespace(), Constants.Metrics.Tag.APP, "AppWithPlugin", Constants.Metrics.Tag.WORKFLOW, AppWithPlugin.WORKFLOW, Constants.Metrics.Tag.RUN_ID, runRecords.get(0).getPid());
    getMetricsManager().waitForTotalMetricCount(workflowTags, String.format("user.destroy.%s", AppWithPlugin.WORKFLOW), 1, 60, TimeUnit.SECONDS);
    // Testing Spark Plugins. First send some data to fileset for the Spark program to process
    DataSetManager<FileSet> fileSetManager = getDataset(AppWithPlugin.SPARK_INPUT);
    FileSet fileSet = fileSetManager.get();
    try (PrintStream out = new PrintStream(fileSet.getLocation("input").append("file.txt").getOutputStream(), true, "UTF-8")) {
        for (int i = 0; i < 5; i++) {
            out.println("Message " + i);
        }
    }
    Map<String, String> sparkArgs = new HashMap<>();
    FileSetArguments.setInputPath(sparkArgs, "input");
    SparkManager sparkManager = appManager.getSparkManager(AppWithPlugin.SPARK).start(sparkArgs);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    // Verify the Spark result.
    DataSetManager<Table> dataSetManager = getDataset(AppWithPlugin.SPARK_TABLE);
    Table table = dataSetManager.get();
    try (Scanner scanner = table.scan(null, null)) {
        for (int i = 0; i < 5; i++) {
            Row row = scanner.next();
            Assert.assertNotNull(row);
            String expected = "Message " + i + " " + AppWithPlugin.TEST;
            Assert.assertEquals(expected, Bytes.toString(row.getRow()));
            Assert.assertEquals(expected, Bytes.toString(row.get(expected)));
        }
        // There shouldn't be any more rows in the table.
        Assert.assertNull(scanner.next());
    }
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactId(io.cdap.cdap.proto.id.ArtifactId) HashMap(java.util.HashMap) WorkflowManager(io.cdap.cdap.test.WorkflowManager) URL(java.net.URL) ServiceManager(io.cdap.cdap.test.ServiceManager) PrintStream(java.io.PrintStream) SparkManager(io.cdap.cdap.test.SparkManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) WorkerManager(io.cdap.cdap.test.WorkerManager) RunRecord(io.cdap.cdap.proto.RunRecord) ArtifactSummary(io.cdap.cdap.api.artifact.ArtifactSummary) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Row(io.cdap.cdap.api.dataset.table.Row) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 34 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.

the class TestFrameworkTestRun method testByteCodeClassLoader.

@Category(XSlowTests.class)
@Test
public void testByteCodeClassLoader() throws Exception {
    // This test verify bytecode generated classes ClassLoading
    ApplicationManager appManager = deployApplication(testSpace, ClassLoaderTestApp.class);
    ServiceManager serviceManager = appManager.getServiceManager("RecordHandler").start();
    URL serviceURL = serviceManager.getServiceURL(15, TimeUnit.SECONDS);
    Assert.assertNotNull(serviceURL);
    // Increment record
    URL url = new URL(serviceURL, "increment/public");
    for (int i = 0; i < 10; i++) {
        HttpResponse response = executeHttp(HttpRequest.post(url).build());
        Assert.assertEquals(200, response.getResponseCode());
    }
    // Query record
    url = new URL(serviceURL, "query?type=public");
    HttpRequest request = HttpRequest.get(url).build();
    HttpResponse response = executeHttp(request);
    Assert.assertEquals(200, response.getResponseCode());
    long count = Long.parseLong(response.getResponseBodyAsString());
    serviceManager.stop();
    // Verify the record count with dataset
    DataSetManager<KeyValueTable> recordsManager = getDataset(testSpace.dataset("records"));
    KeyValueTable records = recordsManager.get();
    Assert.assertEquals(count, Bytes.toLong(records.read("PUBLIC")));
}
Also used : HttpRequest(io.cdap.common.http.HttpRequest) ApplicationManager(io.cdap.cdap.test.ApplicationManager) ServiceManager(io.cdap.cdap.test.ServiceManager) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) HttpResponse(io.cdap.common.http.HttpResponse) URL(java.net.URL) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 35 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.

the class TestFrameworkTestRun method testCustomActionDatasetAccess.

@Category(SlowTests.class)
@Test
public void testCustomActionDatasetAccess() throws Exception {
    addDatasetInstance("keyValueTable", DatasetWithCustomActionApp.CUSTOM_TABLE);
    addDatasetInstance("fileSet", DatasetWithCustomActionApp.CUSTOM_FILESET);
    ApplicationManager appManager = deployApplication(DatasetWithCustomActionApp.class);
    ServiceManager serviceManager = appManager.getServiceManager(DatasetWithCustomActionApp.CUSTOM_SERVICE).start();
    serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    WorkflowManager workflowManager = appManager.getWorkflowManager(DatasetWithCustomActionApp.CUSTOM_WORKFLOW).start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    appManager.stopAll();
    DataSetManager<KeyValueTable> outTableManager = getDataset(DatasetWithCustomActionApp.CUSTOM_TABLE);
    KeyValueTable outputTable = outTableManager.get();
    Assert.assertEquals("world", Bytes.toString(outputTable.read("hello")));
    Assert.assertEquals("service", Bytes.toString(outputTable.read("hi")));
    Assert.assertEquals("another.world", Bytes.toString(outputTable.read("another.hello")));
    DataSetManager<FileSet> outFileSetManager = getDataset(DatasetWithCustomActionApp.CUSTOM_FILESET);
    FileSet fs = outFileSetManager.get();
    try (InputStream in = fs.getLocation("test").getInputStream()) {
        Assert.assertEquals(42, in.read());
    }
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) ServiceManager(io.cdap.cdap.test.ServiceManager) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) InputStream(java.io.InputStream) WorkflowManager(io.cdap.cdap.test.WorkflowManager) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)122 Test (org.junit.Test)65 ApplicationManager (io.cdap.cdap.test.ApplicationManager)59 HashMap (java.util.HashMap)27 SparkManager (io.cdap.cdap.test.SparkManager)26 Table (io.cdap.cdap.api.dataset.table.Table)21 TransactionExecutor (org.apache.tephra.TransactionExecutor)20 WorkflowManager (io.cdap.cdap.test.WorkflowManager)19 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)18 ApplicationWithPrograms (io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)18 KeyValue (io.cdap.cdap.api.dataset.lib.KeyValue)14 ServiceManager (io.cdap.cdap.test.ServiceManager)14 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)14 Location (org.apache.twill.filesystem.Location)14 ImmutableMap (com.google.common.collect.ImmutableMap)13 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)13 File (java.io.File)13 URL (java.net.URL)12