Search in sources :

Example 1 with DatasetSystemMetadataWriter

use of co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter in project cdap by caskdata.

the class DatasetAdminService method writeSystemMetadata.

private void writeSystemMetadata(DatasetId datasetInstanceId, final DatasetSpecification spec, DatasetProperties props, final DatasetTypeMeta typeMeta, final DatasetType type, final DatasetContext context, boolean existing, UserGroupInformation ugi) throws IOException {
    // add system metadata for user datasets only
    if (DatasetsUtil.isUserDataset(datasetInstanceId)) {
        Dataset dataset = null;
        try {
            try {
                dataset = ImpersonationUtils.doAs(ugi, () -> type.getDataset(context, spec, DatasetDefinition.NO_ARGUMENTS));
            } catch (Exception e) {
                LOG.warn("Exception while instantiating Dataset {}", datasetInstanceId, e);
            }
            // Make sure to write whatever system metadata that can be derived
            // even if the above instantiation throws exception
            SystemMetadataWriter systemMetadataWriter;
            if (existing) {
                systemMetadataWriter = new DatasetSystemMetadataWriter(metadataStore, datasetInstanceId, props, dataset, typeMeta.getName(), spec.getDescription());
            } else {
                long createTime = System.currentTimeMillis();
                systemMetadataWriter = new DatasetSystemMetadataWriter(metadataStore, datasetInstanceId, props, createTime, dataset, typeMeta.getName(), spec.getDescription());
            }
            systemMetadataWriter.write();
        } finally {
            if (dataset != null) {
                dataset.close();
            }
        }
    }
}
Also used : DatasetSystemMetadataWriter(co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter) Dataset(co.cask.cdap.api.dataset.Dataset) DatasetSystemMetadataWriter(co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter) SystemMetadataWriter(co.cask.cdap.data2.metadata.system.SystemMetadataWriter) IncompatibleUpdateException(co.cask.cdap.api.dataset.IncompatibleUpdateException) IOException(java.io.IOException) NotFoundException(co.cask.cdap.common.NotFoundException) BadRequestException(co.cask.cdap.common.BadRequestException)

Example 2 with DatasetSystemMetadataWriter

use of co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter in project cdap by caskdata.

the class OpenCloseDataSetTest method testDataSetsAreClosed.

@Test(timeout = 120000)
public void testDataSetsAreClosed() throws Exception {
    final String tableName = "foo";
    TrackingTable.resetTracker();
    ApplicationWithPrograms app = AppFabricTestHelper.deployApplicationWithManager(DummyAppWithTrackingTable.class, TEMP_FOLDER_SUPPLIER);
    List<ProgramController> controllers = Lists.newArrayList();
    // start the programs
    for (ProgramDescriptor programDescriptor : app.getPrograms()) {
        if (programDescriptor.getProgramId().getType().equals(ProgramType.MAPREDUCE)) {
            continue;
        }
        controllers.add(AppFabricTestHelper.submit(app, programDescriptor.getSpecification().getClassName(), new BasicArguments(), TEMP_FOLDER_SUPPLIER));
    }
    // write some data to queue
    TransactionSystemClient txSystemClient = AppFabricTestHelper.getInjector().getInstance(TransactionSystemClient.class);
    QueueName queueName = QueueName.fromStream(app.getApplicationId().getNamespace(), "xx");
    QueueClientFactory queueClientFactory = AppFabricTestHelper.getInjector().getInstance(QueueClientFactory.class);
    QueueProducer producer = queueClientFactory.createProducer(queueName);
    // start tx to write in queue in tx
    Transaction tx = txSystemClient.startShort();
    ((TransactionAware) producer).startTx(tx);
    StreamEventCodec codec = new StreamEventCodec();
    for (int i = 0; i < 4; i++) {
        String msg = "x" + i;
        StreamEvent event = new StreamEvent(ImmutableMap.<String, String>of(), ByteBuffer.wrap(msg.getBytes(Charsets.UTF_8)));
        producer.enqueue(new QueueEntry(codec.encodePayload(event)));
    }
    // commit tx
    ((TransactionAware) producer).commitTx();
    txSystemClient.commitOrThrow(tx);
    while (TrackingTable.getTracker(tableName, "write") < 4) {
        TimeUnit.MILLISECONDS.sleep(50);
    }
    // get the number of writes to the foo table
    Assert.assertEquals(4, TrackingTable.getTracker(tableName, "write"));
    // only 2 "open" calls should be tracked:
    // 1. the flow has started with single flowlet (service is loaded lazily on 1st request)
    // 2. DatasetSystemMetadataWriter also instantiates the dataset because it needs to add some system tags
    // for the dataset
    Assert.assertEquals(2, TrackingTable.getTracker(tableName, "open"));
    // now send a request to the service
    Gson gson = new Gson();
    DiscoveryServiceClient discoveryServiceClient = AppFabricTestHelper.getInjector().getInstance(DiscoveryServiceClient.class);
    Discoverable discoverable = new RandomEndpointStrategy(discoveryServiceClient.discover(String.format("service.%s.%s.%s", DefaultId.NAMESPACE.getEntityName(), "dummy", "DummyService"))).pick(5, TimeUnit.SECONDS);
    Assert.assertNotNull(discoverable);
    HttpClient client = new DefaultHttpClient();
    HttpGet get = new HttpGet(String.format("http://%s:%d/v3/namespaces/default/apps/%s/services/%s/methods/%s", discoverable.getSocketAddress().getHostName(), discoverable.getSocketAddress().getPort(), "dummy", "DummyService", "x1"));
    HttpResponse response = client.execute(get);
    String responseContent = gson.fromJson(new InputStreamReader(response.getEntity().getContent(), Charsets.UTF_8), String.class);
    client.getConnectionManager().shutdown();
    Assert.assertEquals("x1", responseContent);
    // now the dataset must have a read and another open operation
    Assert.assertEquals(1, TrackingTable.getTracker(tableName, "read"));
    Assert.assertEquals(3, TrackingTable.getTracker(tableName, "open"));
    // The dataset that was instantiated by the DatasetSystemMetadataWriter should have been closed
    Assert.assertEquals(1, TrackingTable.getTracker(tableName, "close"));
    // stop all programs, they should both close the data set foo
    for (ProgramController controller : controllers) {
        controller.stop().get();
    }
    int timesOpened = TrackingTable.getTracker(tableName, "open");
    Assert.assertTrue(timesOpened >= 2);
    Assert.assertEquals(timesOpened, TrackingTable.getTracker(tableName, "close"));
    // now start the m/r job
    ProgramController controller = null;
    for (ProgramDescriptor programDescriptor : app.getPrograms()) {
        if (programDescriptor.getProgramId().getType().equals(ProgramType.MAPREDUCE)) {
            controller = AppFabricTestHelper.submit(app, programDescriptor.getSpecification().getClassName(), new BasicArguments(), TEMP_FOLDER_SUPPLIER);
        }
    }
    Assert.assertNotNull(controller);
    while (!controller.getState().equals(ProgramController.State.COMPLETED)) {
        TimeUnit.MILLISECONDS.sleep(100);
    }
    // M/r job is done, one mapper and the m/r client should have opened and closed the data set foo
    // we don't know the exact number of times opened, but it is at least once, and it must be closed the same number
    // of times.
    Assert.assertTrue(timesOpened < TrackingTable.getTracker(tableName, "open"));
    Assert.assertEquals(TrackingTable.getTracker(tableName, "open"), TrackingTable.getTracker(tableName, "close"));
    Assert.assertTrue(0 < TrackingTable.getTracker("bar", "open"));
    Assert.assertEquals(TrackingTable.getTracker("bar", "open"), TrackingTable.getTracker("bar", "close"));
}
Also used : DiscoveryServiceClient(org.apache.twill.discovery.DiscoveryServiceClient) HttpGet(org.apache.http.client.methods.HttpGet) Gson(com.google.gson.Gson) DefaultHttpClient(org.apache.http.impl.client.DefaultHttpClient) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) StreamEventCodec(co.cask.cdap.common.stream.StreamEventCodec) QueueProducer(co.cask.cdap.data2.queue.QueueProducer) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) ProgramDescriptor(co.cask.cdap.app.program.ProgramDescriptor) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) QueueName(co.cask.cdap.common.queue.QueueName) ProgramController(co.cask.cdap.app.runtime.ProgramController) Discoverable(org.apache.twill.discovery.Discoverable) InputStreamReader(java.io.InputStreamReader) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) HttpResponse(org.apache.http.HttpResponse) QueueEntry(co.cask.cdap.data2.queue.QueueEntry) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) DefaultHttpClient(org.apache.http.impl.client.DefaultHttpClient) HttpClient(org.apache.http.client.HttpClient) QueueClientFactory(co.cask.cdap.data2.queue.QueueClientFactory) RandomEndpointStrategy(co.cask.cdap.common.discovery.RandomEndpointStrategy) Test(org.junit.Test)

Example 3 with DatasetSystemMetadataWriter

use of co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter in project cdap by caskdata.

the class ExistingEntitySystemMetadataWriter method writeSystemMetadataForDatasets.

private void writeSystemMetadataForDatasets(NamespaceId namespace, DatasetFramework dsFramework) throws DatasetManagementException, IOException, NamespaceNotFoundException {
    SystemDatasetInstantiatorFactory systemDatasetInstantiatorFactory = new SystemDatasetInstantiatorFactory(locationFactory, dsFramework, cConf);
    try (SystemDatasetInstantiator systemDatasetInstantiator = systemDatasetInstantiatorFactory.create()) {
        for (DatasetSpecificationSummary summary : dsFramework.getInstances(namespace)) {
            final DatasetId dsInstance = namespace.dataset(summary.getName());
            DatasetProperties dsProperties = DatasetProperties.of(summary.getProperties());
            String dsType = summary.getType();
            Dataset dataset = null;
            try {
                try {
                    dataset = impersonator.doAs(dsInstance, new Callable<Dataset>() {

                        @Override
                        public Dataset call() throws Exception {
                            return systemDatasetInstantiator.getDataset(dsInstance);
                        }
                    });
                } catch (Exception e) {
                    LOG.warn("Exception while instantiating dataset {}", dsInstance, e);
                }
                SystemMetadataWriter writer = new DatasetSystemMetadataWriter(metadataStore, dsInstance, dsProperties, dataset, dsType, summary.getDescription());
                writer.write();
            } finally {
                if (dataset != null) {
                    dataset.close();
                }
            }
        }
    }
}
Also used : SystemDatasetInstantiatorFactory(co.cask.cdap.data.dataset.SystemDatasetInstantiatorFactory) DatasetSystemMetadataWriter(co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter) SystemDatasetInstantiator(co.cask.cdap.data.dataset.SystemDatasetInstantiator) Dataset(co.cask.cdap.api.dataset.Dataset) DatasetProperties(co.cask.cdap.api.dataset.DatasetProperties) DatasetSystemMetadataWriter(co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter) ProgramSystemMetadataWriter(co.cask.cdap.data2.metadata.system.ProgramSystemMetadataWriter) ViewSystemMetadataWriter(co.cask.cdap.data2.metadata.system.ViewSystemMetadataWriter) SystemMetadataWriter(co.cask.cdap.data2.metadata.system.SystemMetadataWriter) AppSystemMetadataWriter(co.cask.cdap.data2.metadata.system.AppSystemMetadataWriter) ArtifactSystemMetadataWriter(co.cask.cdap.data2.metadata.system.ArtifactSystemMetadataWriter) StreamSystemMetadataWriter(co.cask.cdap.data2.metadata.system.StreamSystemMetadataWriter) DatasetSpecificationSummary(co.cask.cdap.proto.DatasetSpecificationSummary) Callable(java.util.concurrent.Callable) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) NamespaceNotFoundException(co.cask.cdap.common.NamespaceNotFoundException) IOException(java.io.IOException) DatasetId(co.cask.cdap.proto.id.DatasetId)

Example 4 with DatasetSystemMetadataWriter

use of co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter in project cdap by caskdata.

the class DatasetSystemMetadataWriterTest method assertDatasetSchema.

private void assertDatasetSchema(String expected, DatasetProperties properties) {
    DatasetSystemMetadataWriter metadataWriter = new DatasetSystemMetadataWriter(new NoOpMetadataStore(), new DatasetId("ns1", "avro1"), properties, null, null, null);
    Assert.assertEquals(expected, metadataWriter.getSchemaToAdd());
}
Also used : NoOpMetadataStore(co.cask.cdap.data2.metadata.store.NoOpMetadataStore) DatasetId(co.cask.cdap.proto.id.DatasetId)

Aggregations

Dataset (co.cask.cdap.api.dataset.Dataset)2 DatasetSystemMetadataWriter (co.cask.cdap.data2.metadata.system.DatasetSystemMetadataWriter)2 SystemMetadataWriter (co.cask.cdap.data2.metadata.system.SystemMetadataWriter)2 DatasetId (co.cask.cdap.proto.id.DatasetId)2 IOException (java.io.IOException)2 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)1 DatasetProperties (co.cask.cdap.api.dataset.DatasetProperties)1 IncompatibleUpdateException (co.cask.cdap.api.dataset.IncompatibleUpdateException)1 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)1 ProgramDescriptor (co.cask.cdap.app.program.ProgramDescriptor)1 ProgramController (co.cask.cdap.app.runtime.ProgramController)1 BadRequestException (co.cask.cdap.common.BadRequestException)1 NamespaceNotFoundException (co.cask.cdap.common.NamespaceNotFoundException)1 NotFoundException (co.cask.cdap.common.NotFoundException)1 RandomEndpointStrategy (co.cask.cdap.common.discovery.RandomEndpointStrategy)1 QueueName (co.cask.cdap.common.queue.QueueName)1 StreamEventCodec (co.cask.cdap.common.stream.StreamEventCodec)1 SystemDatasetInstantiator (co.cask.cdap.data.dataset.SystemDatasetInstantiator)1 SystemDatasetInstantiatorFactory (co.cask.cdap.data.dataset.SystemDatasetInstantiatorFactory)1 NoOpMetadataStore (co.cask.cdap.data2.metadata.store.NoOpMetadataStore)1