Search in sources :

Example 61 with DatasetId

use of co.cask.cdap.proto.id.DatasetId in project cdap by caskdata.

the class ExistingEntitySystemMetadataWriter method writeSystemMetadataForDatasets.

private void writeSystemMetadataForDatasets(NamespaceId namespace, DatasetFramework dsFramework) throws DatasetManagementException, IOException, NamespaceNotFoundException {
    SystemDatasetInstantiatorFactory systemDatasetInstantiatorFactory = new SystemDatasetInstantiatorFactory(locationFactory, dsFramework, cConf);
    try (SystemDatasetInstantiator systemDatasetInstantiator = systemDatasetInstantiatorFactory.create()) {
        for (DatasetSpecificationSummary summary : dsFramework.getInstances(namespace)) {
            final DatasetId dsInstance = namespace.dataset(summary.getName());
            DatasetProperties dsProperties = DatasetProperties.of(summary.getProperties());
            String dsType = summary.getType();
            Dataset dataset = null;
            try {
                try {
                    dataset = impersonator.doAs(dsInstance, new Callable<Dataset>() {

                        @Override
                        public Dataset call() throws Exception {
                            return systemDatasetInstantiator.getDataset(dsInstance);
                        }
                    });
                } catch (Exception e) {
                    LOG.warn("Exception while instantiating dataset {}", dsInstance, e);
                }
                SystemMetadataWriter writer = new DatasetSystemMetadataWriter(metadataStore, dsInstance, dsProperties, dataset, dsType, summary.getDescription());
                writer.write();
            } finally {
                if (dataset != null) {
                    dataset.close();
                }
            }
        }
    }
}
Also used : SystemDatasetInstantiatorFactory(co.cask.cdap.data.dataset.SystemDatasetInstantiatorFactory) DatasetSystemMetadataWriter(co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter) SystemDatasetInstantiator(co.cask.cdap.data.dataset.SystemDatasetInstantiator) Dataset(co.cask.cdap.api.dataset.Dataset) DatasetProperties(co.cask.cdap.api.dataset.DatasetProperties) DatasetSystemMetadataWriter(co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter) ProgramSystemMetadataWriter(co.cask.cdap.data2.metadata.system.ProgramSystemMetadataWriter) ViewSystemMetadataWriter(co.cask.cdap.data2.metadata.system.ViewSystemMetadataWriter) SystemMetadataWriter(co.cask.cdap.data2.metadata.system.SystemMetadataWriter) AppSystemMetadataWriter(co.cask.cdap.data2.metadata.system.AppSystemMetadataWriter) ArtifactSystemMetadataWriter(co.cask.cdap.data2.metadata.system.ArtifactSystemMetadataWriter) StreamSystemMetadataWriter(co.cask.cdap.data2.metadata.system.StreamSystemMetadataWriter) DatasetSpecificationSummary(co.cask.cdap.proto.DatasetSpecificationSummary) Callable(java.util.concurrent.Callable) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) NamespaceNotFoundException(co.cask.cdap.common.NamespaceNotFoundException) IOException(java.io.IOException) DatasetId(co.cask.cdap.proto.id.DatasetId)

Example 62 with DatasetId

use of co.cask.cdap.proto.id.DatasetId in project cdap by caskdata.

the class AppMetadataStoreTest method testScanRunningInRangeWithBatch.

@Test
public void testScanRunningInRangeWithBatch() throws Exception {
    DatasetId storeTable = NamespaceId.DEFAULT.dataset("testScanRunningInRange");
    datasetFramework.addInstance(Table.class.getName(), storeTable, DatasetProperties.EMPTY);
    Table table = datasetFramework.getDataset(storeTable, ImmutableMap.<String, String>of(), null);
    Assert.assertNotNull(table);
    final AppMetadataStore metadataStoreDataset = new AppMetadataStore(table, cConf, new AtomicBoolean(false));
    TransactionExecutor txnl = txExecutorFactory.createExecutor(Collections.singleton((TransactionAware) metadataStoreDataset));
    // Add some run records
    TreeSet<Long> expected = new TreeSet<>();
    for (int i = 0; i < 100; ++i) {
        ApplicationId application = NamespaceId.DEFAULT.app("app" + i);
        final ProgramId program = application.program(ProgramType.values()[i % ProgramType.values().length], "program" + i);
        final RunId runId = RunIds.generate((i + 1) * 10000);
        expected.add(RunIds.getTime(runId, TimeUnit.MILLISECONDS));
        // Start the program and stop it
        final int j = i;
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                metadataStoreDataset.recordProgramStart(program, runId.getId(), RunIds.getTime(runId, TimeUnit.SECONDS), null, null, null);
                metadataStoreDataset.recordProgramStop(program, runId.getId(), RunIds.getTime(runId, TimeUnit.SECONDS), ProgramRunStatus.values()[j % ProgramRunStatus.values().length], null);
            }
        });
    }
    // Run full scan
    runScan(txnl, metadataStoreDataset, expected, 0, Long.MAX_VALUE);
    // In all below assertions, TreeSet and metadataStore both have start time inclusive and end time exclusive.
    // Run the scan with time limit
    runScan(txnl, metadataStoreDataset, expected.subSet(30 * 10000L, 90 * 10000L), TimeUnit.MILLISECONDS.toSeconds(30 * 10000), TimeUnit.MILLISECONDS.toSeconds(90 * 10000));
    runScan(txnl, metadataStoreDataset, expected.subSet(90 * 10000L, 101 * 10000L), TimeUnit.MILLISECONDS.toSeconds(90 * 10000), TimeUnit.MILLISECONDS.toSeconds(101 * 10000));
    // After range
    runScan(txnl, metadataStoreDataset, expected.subSet(101 * 10000L, 200 * 10000L), TimeUnit.MILLISECONDS.toSeconds(101 * 10000), TimeUnit.MILLISECONDS.toSeconds(200 * 10000));
    // Identical start and end time
    runScan(txnl, metadataStoreDataset, expected.subSet(31 * 10000L, 31 * 10000L), TimeUnit.MILLISECONDS.toSeconds(31 * 10000), TimeUnit.MILLISECONDS.toSeconds(31 * 10000));
    // One unit difference between start and end time
    runScan(txnl, metadataStoreDataset, expected.subSet(30 * 10000L, 31 * 10000L), TimeUnit.MILLISECONDS.toSeconds(30 * 10000), TimeUnit.MILLISECONDS.toSeconds(31 * 10000));
    // Before range
    runScan(txnl, metadataStoreDataset, expected.subSet(1000L, 10000L), TimeUnit.MILLISECONDS.toSeconds(1000), TimeUnit.MILLISECONDS.toSeconds(10000));
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) ProgramId(co.cask.cdap.proto.id.ProgramId) TransactionFailureException(org.apache.tephra.TransactionFailureException) DatasetId(co.cask.cdap.proto.id.DatasetId) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TransactionAware(org.apache.tephra.TransactionAware) TreeSet(java.util.TreeSet) ApplicationId(co.cask.cdap.proto.id.ApplicationId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Test(org.junit.Test)

Example 63 with DatasetId

use of co.cask.cdap.proto.id.DatasetId in project cdap by caskdata.

the class AppMetadataStoreTest method testgetRuns.

@Test
public void testgetRuns() throws Exception {
    DatasetId storeTable = NamespaceId.DEFAULT.dataset("testgetRuns");
    datasetFramework.addInstance(Table.class.getName(), storeTable, DatasetProperties.EMPTY);
    Table table = datasetFramework.getDataset(storeTable, ImmutableMap.<String, String>of(), null);
    Assert.assertNotNull(table);
    final AppMetadataStore metadataStoreDataset = new AppMetadataStore(table, cConf, new AtomicBoolean(false));
    TransactionExecutor txnl = txExecutorFactory.createExecutor(Collections.singleton((TransactionAware) metadataStoreDataset));
    // Add some run records
    final Set<String> expected = new TreeSet<>();
    final Set<String> expectedHalf = new TreeSet<>();
    final Set<ProgramRunId> programRunIdSet = new HashSet<>();
    final Set<ProgramRunId> programRunIdSetHalf = new HashSet<>();
    for (int i = 0; i < 100; ++i) {
        ApplicationId application = NamespaceId.DEFAULT.app("app");
        final ProgramId program = application.program(ProgramType.FLOW, "program");
        final RunId runId = RunIds.generate((i + 1) * 10000);
        expected.add(runId.toString());
        final int index = i;
        // Add every other runId
        if ((i % 2) == 0) {
            expectedHalf.add(runId.toString());
        }
        ProgramRunId programRunId = new ProgramRunId(program.getNamespace(), program.getApplication(), program.getType(), program.getProgram(), runId.toString());
        programRunIdSet.add(programRunId);
        //Add every other programRunId
        if ((i % 2) == 0) {
            programRunIdSetHalf.add(programRunId);
        }
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // Start the program and stop it
                metadataStoreDataset.recordProgramStart(program, runId.getId(), RunIds.getTime(runId, TimeUnit.SECONDS), null, null, null);
                metadataStoreDataset.recordProgramStop(program, runId.getId(), RunIds.getTime(runId, TimeUnit.SECONDS), ProgramRunStatus.values()[index % ProgramRunStatus.values().length], null);
            }
        });
    }
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Map<ProgramRunId, RunRecordMeta> runMap = metadataStoreDataset.getRuns(programRunIdSet);
            Set<String> actual = new TreeSet<>();
            for (Map.Entry<ProgramRunId, RunRecordMeta> entry : runMap.entrySet()) {
                actual.add(entry.getValue().getPid());
            }
            Assert.assertEquals(expected, actual);
            Map<ProgramRunId, RunRecordMeta> runMapHalf = metadataStoreDataset.getRuns(programRunIdSetHalf);
            Set<String> actualHalf = new TreeSet<>();
            for (Map.Entry<ProgramRunId, RunRecordMeta> entry : runMapHalf.entrySet()) {
                actualHalf.add(entry.getValue().getPid());
            }
            Assert.assertEquals(expectedHalf, actualHalf);
        }
    });
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Set(java.util.Set) TransactionExecutor(org.apache.tephra.TransactionExecutor) ProgramId(co.cask.cdap.proto.id.ProgramId) TransactionFailureException(org.apache.tephra.TransactionFailureException) DatasetId(co.cask.cdap.proto.id.DatasetId) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TransactionAware(org.apache.tephra.TransactionAware) TreeSet(java.util.TreeSet) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 64 with DatasetId

use of co.cask.cdap.proto.id.DatasetId in project cdap by caskdata.

the class RemoteLineageWriterTest method testSimpleCase.

@Test
public void testSimpleCase() {
    long now = System.currentTimeMillis();
    ApplicationId appId = NamespaceId.DEFAULT.app("test_app");
    ProgramId flowId = appId.flow("test_flow");
    ProgramRunId runId = flowId.run(RunIds.generate(now).getId());
    RunId twillRunId = RunIds.fromString(runId.getRun());
    DatasetId datasetId = NamespaceId.DEFAULT.dataset("test_dataset");
    StreamId streamId = NamespaceId.DEFAULT.stream("test_stream");
    Set<Relation> expectedRelations = new HashSet<>();
    // test null serialization
    remoteLineageWriter.addAccess(runId, datasetId, AccessType.READ, null);
    expectedRelations.add(new Relation(datasetId, flowId, AccessType.READ, twillRunId));
    Assert.assertEquals(ImmutableSet.of(flowId, datasetId), lineageStore.getEntitiesForRun(runId));
    Assert.assertEquals(expectedRelations, lineageStore.getRelations(flowId, now, now + 1, Predicates.<Relation>alwaysTrue()));
    remoteLineageWriter.addAccess(runId, streamId, AccessType.READ);
    expectedRelations.add(new Relation(streamId, flowId, AccessType.READ, twillRunId));
    Assert.assertEquals(expectedRelations, lineageStore.getRelations(flowId, now, now + 1, Predicates.<Relation>alwaysTrue()));
    remoteLineageWriter.addAccess(runId, streamId, AccessType.WRITE);
    expectedRelations.add(new Relation(streamId, flowId, AccessType.WRITE, twillRunId));
    Assert.assertEquals(expectedRelations, lineageStore.getRelations(flowId, now, now + 1, Predicates.<Relation>alwaysTrue()));
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Relation(co.cask.cdap.data2.metadata.lineage.Relation) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) ProgramId(co.cask.cdap.proto.id.ProgramId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) DatasetId(co.cask.cdap.proto.id.DatasetId) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 65 with DatasetId

use of co.cask.cdap.proto.id.DatasetId in project cdap by caskdata.

the class SetDatasetInstancePropertiesCommand method perform.

@Override
public void perform(Arguments arguments, PrintStream output) throws Exception {
    DatasetId instance = cliConfig.getCurrentNamespace().dataset(arguments.get(ArgumentName.DATASET.toString()));
    Map<String, String> properties = ArgumentParser.parseMap(arguments.get(ArgumentName.DATASET_PROPERTIES.toString()), ArgumentName.DATASET_PROPERTIES.toString());
    datasetClient.updateExisting(instance, properties);
    output.printf("Successfully updated properties for dataset instance '%s' to %s", instance.getEntityName(), GSON.toJson(properties));
}
Also used : DatasetId(co.cask.cdap.proto.id.DatasetId)

Aggregations

DatasetId (co.cask.cdap.proto.id.DatasetId)180 Test (org.junit.Test)96 ProgramId (co.cask.cdap.proto.id.ProgramId)34 StreamId (co.cask.cdap.proto.id.StreamId)34 Path (javax.ws.rs.Path)34 TransactionExecutor (org.apache.tephra.TransactionExecutor)31 NamespaceId (co.cask.cdap.proto.id.NamespaceId)25 ApplicationId (co.cask.cdap.proto.id.ApplicationId)23 IOException (java.io.IOException)19 POST (javax.ws.rs.POST)17 TransactionFailureException (org.apache.tephra.TransactionFailureException)17 DatasetSpecification (co.cask.cdap.api.dataset.DatasetSpecification)16 QueryResult (co.cask.cdap.proto.QueryResult)16 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)15 ColumnDesc (co.cask.cdap.proto.ColumnDesc)14 Map (java.util.Map)13 NoSuchElementException (java.util.NoSuchElementException)13 Table (co.cask.cdap.api.dataset.table.Table)12 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)11 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)11