Search in sources :

Example 31 with Metadata

use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.

the class DefaultMetadataStore method setProperties.

/**
 * Adds/updates metadata for the specified {@link NamespacedEntityId}.
 */
@Override
public void setProperties(final MetadataScope scope, final NamespacedEntityId namespacedEntityId, final Map<String, String> properties) {
    final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>();
    execute(new TransactionExecutor.Procedure<MetadataDataset>() {

        @Override
        public void apply(MetadataDataset input) throws Exception {
            Map<String, String> existingProperties = input.getProperties(namespacedEntityId);
            Set<String> existingTags = input.getTags(namespacedEntityId);
            previousRef.set(new MetadataRecord(namespacedEntityId, scope, existingProperties, existingTags));
            for (Map.Entry<String, String> entry : properties.entrySet()) {
                input.setProperty(namespacedEntityId, entry.getKey(), entry.getValue());
            }
        }
    }, scope);
    final ImmutableMap.Builder<String, String> propAdditions = ImmutableMap.builder();
    final ImmutableMap.Builder<String, String> propDeletions = ImmutableMap.builder();
    MetadataRecord previousRecord = previousRef.get();
    // Iterating over properties all over again, because we want to move the diff calculation outside the transaction.
    for (Map.Entry<String, String> entry : properties.entrySet()) {
        String existingValue = previousRecord.getProperties().get(entry.getKey());
        if (existingValue != null && existingValue.equals(entry.getValue())) {
            // Value already exists and is the same as the value being passed. No update necessary.
            continue;
        }
        // If it is an update, then mark a single deletion.
        if (existingValue != null) {
            propDeletions.put(entry.getKey(), existingValue);
        }
        // In both update or new cases, mark a single addition.
        propAdditions.put(entry.getKey(), entry.getValue());
    }
    publishAudit(previousRecord, new MetadataRecord(namespacedEntityId, scope, propAdditions.build(), EMPTY_TAGS), new MetadataRecord(namespacedEntityId, scope, propDeletions.build(), EMPTY_TAGS));
}
Also used : MetadataDataset(co.cask.cdap.data2.metadata.dataset.MetadataDataset) EnumSet(java.util.EnumSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) AtomicReference(java.util.concurrent.atomic.AtomicReference) TransactionExecutor(org.apache.tephra.TransactionExecutor) BadRequestException(co.cask.cdap.common.BadRequestException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) MetadataRecord(co.cask.cdap.common.metadata.MetadataRecord) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 32 with Metadata

use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.

the class DefaultMetadataStore method removeMetadata.

/**
 * Removes all metadata (including properties and tags) for the specified {@link NamespacedEntityId}.
 */
@Override
public void removeMetadata(final MetadataScope scope, final NamespacedEntityId namespacedEntityId) {
    final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>();
    execute(new TransactionExecutor.Procedure<MetadataDataset>() {

        @Override
        public void apply(MetadataDataset input) throws Exception {
            previousRef.set(new MetadataRecord(namespacedEntityId, scope, input.getProperties(namespacedEntityId), input.getTags(namespacedEntityId)));
            input.removeProperties(namespacedEntityId);
            input.removeTags(namespacedEntityId);
        }
    }, scope);
    MetadataRecord previous = previousRef.get();
    publishAudit(previous, new MetadataRecord(namespacedEntityId, scope), new MetadataRecord(previous));
}
Also used : MetadataDataset(co.cask.cdap.data2.metadata.dataset.MetadataDataset) AtomicReference(java.util.concurrent.atomic.AtomicReference) TransactionExecutor(org.apache.tephra.TransactionExecutor) MetadataRecord(co.cask.cdap.common.metadata.MetadataRecord) BadRequestException(co.cask.cdap.common.BadRequestException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException)

Example 33 with Metadata

use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.

the class FileMetadataCleanerTest method testScanAndDeleteOldMetadata.

@Test
public void testScanAndDeleteOldMetadata() throws Exception {
    // use file meta data manager to write meta data in old format
    // use file meta writer to write meta data in new format
    // scan for old files and make sure we only get the old meta data entries.
    DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
    DatasetManager datasetManager = new DefaultDatasetManager(datasetFramework, NamespaceId.SYSTEM, co.cask.cdap.common.service.RetryStrategies.noRetry(), null);
    Transactional transactional = Transactions.createTransactionalWithRetry(Transactions.createTransactional(new MultiThreadDatasetCache(new SystemDatasetInstantiator(datasetFramework), injector.getInstance(TransactionSystemClient.class), NamespaceId.SYSTEM, ImmutableMap.<String, String>of(), null, null)), RetryStrategies.retryOnConflict(20, 100));
    FileMetaDataWriter fileMetaDataWriter = new FileMetaDataWriter(datasetManager, transactional);
    FileMetaDataManager fileMetaDataManager = injector.getInstance(FileMetaDataManager.class);
    LoggingContext flowContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testFlow", ProgramType.FLOW);
    long eventTimestamp = System.currentTimeMillis();
    LocationFactory locationFactory = injector.getInstance(LocationFactory.class);
    Location testLocation = locationFactory.create("testFile");
    try {
        // write 50 entries in old format
        for (int i = 0; i < 50; i++) {
            fileMetaDataManager.writeMetaData(flowContext, eventTimestamp + i, testLocation);
        }
        LoggingContext wflowContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testWflow", ProgramType.WORKFLOW);
        fileMetaDataManager.writeMetaData(wflowContext, eventTimestamp, testLocation);
        LoggingContext mrContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testMR", ProgramType.MAPREDUCE);
        fileMetaDataManager.writeMetaData(mrContext, eventTimestamp, testLocation);
        LoggingContext sparkContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testSpark", ProgramType.SPARK);
        fileMetaDataManager.writeMetaData(sparkContext, eventTimestamp, testLocation);
        // write 50 entries in new format
        long newEventTime = eventTimestamp + 1000;
        long currentTime = newEventTime + 1000;
        LogPathIdentifier logPathIdentifier = new LogPathIdentifier("testNs", "testApp", "testFlow");
        for (int i = 50; i < 100; i++) {
            fileMetaDataWriter.writeMetaData(logPathIdentifier, newEventTime + i, currentTime + i, testLocation);
        }
        FileMetaDataReader fileMetaDataReader = injector.getInstance(FileMetaDataReader.class);
        Assert.assertEquals(50, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(flowContext), eventTimestamp - 1, eventTimestamp + 100).size());
        Assert.assertEquals(1, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(wflowContext), eventTimestamp - 1, eventTimestamp + 100).size());
        Assert.assertEquals(1, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(mrContext), eventTimestamp - 1, eventTimestamp + 100).size());
        Assert.assertEquals(1, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(sparkContext), eventTimestamp - 1, eventTimestamp + 100).size());
        FileMetadataCleaner fileMetadataCleaner = new FileMetadataCleaner(datasetManager, transactional);
        fileMetadataCleaner.scanAndDeleteOldMetaData(TRANSACTION_TIMEOUT, CUTOFF_TIME_TRANSACTION);
        // deleted all old metadata
        Assert.assertEquals(0, fileMetaDataReader.listFiles(logPathIdentifier, eventTimestamp - 1, eventTimestamp + 100).size());
        Assert.assertEquals(0, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(wflowContext), eventTimestamp - 1, eventTimestamp + 100).size());
        Assert.assertEquals(0, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(mrContext), eventTimestamp - 1, eventTimestamp + 100).size());
        Assert.assertEquals(0, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(sparkContext), eventTimestamp - 1, eventTimestamp + 100).size());
    } finally {
        // cleanup meta
        cleanupMetadata(transactional, datasetManager);
    }
}
Also used : FileMetaDataWriter(co.cask.cdap.logging.meta.FileMetaDataWriter) LoggingContext(co.cask.cdap.common.logging.LoggingContext) DefaultDatasetManager(co.cask.cdap.data2.datafabric.dataset.DefaultDatasetManager) DatasetManager(co.cask.cdap.api.dataset.DatasetManager) DefaultDatasetManager(co.cask.cdap.data2.datafabric.dataset.DefaultDatasetManager) LocationFactory(org.apache.twill.filesystem.LocationFactory) DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) MultiThreadDatasetCache(co.cask.cdap.data2.dataset2.MultiThreadDatasetCache) SystemDatasetInstantiator(co.cask.cdap.data.dataset.SystemDatasetInstantiator) FileMetaDataManager(co.cask.cdap.logging.write.FileMetaDataManager) LogPathIdentifier(co.cask.cdap.logging.appender.system.LogPathIdentifier) FileMetaDataReader(co.cask.cdap.logging.meta.FileMetaDataReader) Transactional(co.cask.cdap.api.Transactional) Location(org.apache.twill.filesystem.Location) LogLocation(co.cask.cdap.logging.write.LogLocation) Test(org.junit.Test)

Example 34 with Metadata

use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.

the class UpgradeTool method createInjector.

@VisibleForTesting
Injector createInjector() throws Exception {
    return Guice.createInjector(new ConfigModule(cConf, hConf), new LocationRuntimeModule().getDistributedModules(), new ZKClientModule(), new DiscoveryRuntimeModule().getDistributedModules(), new MessagingClientModule(), Modules.override(new DataSetsModules().getDistributedModules()).with(new AbstractModule() {

        @Override
        protected void configure() {
            bind(DatasetFramework.class).to(InMemoryDatasetFramework.class).in(Scopes.SINGLETON);
            // the DataSetsModules().getDistributedModules() binds to RemoteDatasetFramework so override that to
            // the same InMemoryDatasetFramework
            bind(DatasetFramework.class).annotatedWith(Names.named(DataSetsModules.BASE_DATASET_FRAMEWORK)).to(DatasetFramework.class);
            install(new FactoryModuleBuilder().implement(DatasetDefinitionRegistry.class, DefaultDatasetDefinitionRegistry.class).build(DatasetDefinitionRegistryFactory.class));
            // CDAP-5954 Upgrade tool does not need to record lineage and metadata changes for now.
            bind(LineageWriter.class).to(NoOpLineageWriter.class);
        }
    }), new ViewAdminModules().getDistributedModules(), new StreamAdminModules().getDistributedModules(), new NotificationFeedClientModule(), new TwillModule(), new ExploreClientModule(), new ProgramRunnerRuntimeModule().getDistributedModules(), new ServiceStoreModules().getDistributedModules(), new SystemDatasetRuntimeModule().getDistributedModules(), // don't need real notifications for upgrade, so use the in-memory implementations
    new NotificationServiceRuntimeModule().getInMemoryModules(), new KafkaClientModule(), new NamespaceStoreModule().getDistributedModules(), new AuthenticationContextModules().getMasterModule(), new AuthorizationModule(), new AuthorizationEnforcementModule().getMasterModule(), new SecureStoreModules().getDistributedModules(), new DataFabricModules(UpgradeTool.class.getName()).getDistributedModules(), new AppFabricServiceRuntimeModule().getDistributedModules(), new AbstractModule() {

        @Override
        protected void configure() {
            // the DataFabricDistributedModule needs MetricsCollectionService binding and since Upgrade tool does not do
            // anything with Metrics we just bind it to NoOpMetricsCollectionService
            bind(MetricsCollectionService.class).to(NoOpMetricsCollectionService.class).in(Scopes.SINGLETON);
            bind(MetricDatasetFactory.class).to(DefaultMetricDatasetFactory.class).in(Scopes.SINGLETON);
            bind(MetricStore.class).to(DefaultMetricStore.class);
        }

        @Provides
        @Singleton
        @Named("datasetInstanceManager")
        @SuppressWarnings("unused")
        public DatasetInstanceManager getDatasetInstanceManager(TransactionSystemClientService txClient, TransactionExecutorFactory txExecutorFactory, @Named("datasetMDS") DatasetFramework framework) {
            return new DatasetInstanceManager(txClient, txExecutorFactory, framework);
        }

        // This is needed because the LocalApplicationManager
        // expects a dsframework injection named datasetMDS
        @Provides
        @Singleton
        @Named("datasetMDS")
        @SuppressWarnings("unused")
        public DatasetFramework getInDsFramework(DatasetFramework dsFramework) {
            return dsFramework;
        }
    });
}
Also used : MessagingClientModule(co.cask.cdap.messaging.guice.MessagingClientModule) ConfigModule(co.cask.cdap.common.guice.ConfigModule) FactoryModuleBuilder(com.google.inject.assistedinject.FactoryModuleBuilder) NamespaceStoreModule(co.cask.cdap.store.guice.NamespaceStoreModule) NotificationServiceRuntimeModule(co.cask.cdap.notifications.guice.NotificationServiceRuntimeModule) ViewAdminModules(co.cask.cdap.data.view.ViewAdminModules) TransactionExecutorFactory(co.cask.cdap.data2.transaction.TransactionExecutorFactory) MetricDatasetFactory(co.cask.cdap.metrics.store.MetricDatasetFactory) DefaultMetricDatasetFactory(co.cask.cdap.metrics.store.DefaultMetricDatasetFactory) DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) InMemoryDatasetFramework(co.cask.cdap.data2.dataset2.InMemoryDatasetFramework) ZKClientModule(co.cask.cdap.common.guice.ZKClientModule) DatasetDefinitionRegistryFactory(co.cask.cdap.data2.dataset2.DatasetDefinitionRegistryFactory) KafkaClientModule(co.cask.cdap.common.guice.KafkaClientModule) TransactionSystemClientService(co.cask.cdap.data2.transaction.TransactionSystemClientService) SystemDatasetRuntimeModule(co.cask.cdap.data.runtime.SystemDatasetRuntimeModule) DiscoveryRuntimeModule(co.cask.cdap.common.guice.DiscoveryRuntimeModule) AuthorizationModule(co.cask.cdap.app.guice.AuthorizationModule) InMemoryDatasetFramework(co.cask.cdap.data2.dataset2.InMemoryDatasetFramework) Named(com.google.inject.name.Named) TwillModule(co.cask.cdap.app.guice.TwillModule) DatasetInstanceManager(co.cask.cdap.data2.datafabric.dataset.instance.DatasetInstanceManager) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) NoOpMetricsCollectionService(co.cask.cdap.common.metrics.NoOpMetricsCollectionService) AuthenticationContextModules(co.cask.cdap.security.auth.context.AuthenticationContextModules) DataSetsModules(co.cask.cdap.data.runtime.DataSetsModules) SecureStoreModules(co.cask.cdap.security.guice.SecureStoreModules) LocationRuntimeModule(co.cask.cdap.common.guice.LocationRuntimeModule) DefaultMetricStore(co.cask.cdap.metrics.store.DefaultMetricStore) Provides(com.google.inject.Provides) AbstractModule(com.google.inject.AbstractModule) StreamAdminModules(co.cask.cdap.data.stream.StreamAdminModules) ProgramRunnerRuntimeModule(co.cask.cdap.app.guice.ProgramRunnerRuntimeModule) LineageWriter(co.cask.cdap.data2.metadata.writer.LineageWriter) NoOpLineageWriter(co.cask.cdap.data2.metadata.writer.NoOpLineageWriter) ExploreClientModule(co.cask.cdap.explore.guice.ExploreClientModule) Singleton(com.google.inject.Singleton) NotificationFeedClientModule(co.cask.cdap.notifications.feeds.client.NotificationFeedClientModule) DataFabricModules(co.cask.cdap.data.runtime.DataFabricModules) ServiceStoreModules(co.cask.cdap.app.guice.ServiceStoreModules) AuthorizationEnforcementModule(co.cask.cdap.security.authorization.AuthorizationEnforcementModule) AppFabricServiceRuntimeModule(co.cask.cdap.app.guice.AppFabricServiceRuntimeModule) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 35 with Metadata

use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.

the class SystemMetadataWriterStage method process.

@Override
public void process(ApplicationWithPrograms input) throws Exception {
    // add system metadata for apps
    ApplicationId appId = input.getApplicationId();
    ApplicationSpecification appSpec = input.getSpecification();
    // only update creation time if this is a new app
    Map<String, String> properties = metadataStore.getProperties(MetadataScope.SYSTEM, appId);
    SystemMetadataWriter appSystemMetadataWriter = new AppSystemMetadataWriter(metadataStore, appId, appSpec, !properties.isEmpty());
    appSystemMetadataWriter.write();
    // add system metadata for programs
    writeProgramSystemMetadata(appId, ProgramType.FLOW, appSpec.getFlows().values());
    writeProgramSystemMetadata(appId, ProgramType.MAPREDUCE, appSpec.getMapReduce().values());
    writeProgramSystemMetadata(appId, ProgramType.SERVICE, appSpec.getServices().values());
    writeProgramSystemMetadata(appId, ProgramType.SPARK, appSpec.getSpark().values());
    writeProgramSystemMetadata(appId, ProgramType.WORKER, appSpec.getWorkers().values());
    writeProgramSystemMetadata(appId, ProgramType.WORKFLOW, appSpec.getWorkflows().values());
    // Emit input to the next stage
    emit(input);
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) SystemMetadataWriter(co.cask.cdap.data2.metadata.system.SystemMetadataWriter) AppSystemMetadataWriter(co.cask.cdap.data2.metadata.system.AppSystemMetadataWriter) ProgramSystemMetadataWriter(co.cask.cdap.data2.metadata.system.ProgramSystemMetadataWriter) ApplicationId(co.cask.cdap.proto.id.ApplicationId) AppSystemMetadataWriter(co.cask.cdap.data2.metadata.system.AppSystemMetadataWriter)

Aggregations

IOException (java.io.IOException)9 BadRequestException (co.cask.cdap.common.BadRequestException)8 MetadataRecord (co.cask.cdap.common.metadata.MetadataRecord)7 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)7 Row (co.cask.cdap.api.dataset.table.Row)6 Scanner (co.cask.cdap.api.dataset.table.Scanner)5 DatasetId (co.cask.cdap.proto.id.DatasetId)5 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)5 Test (org.junit.Test)5 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)4 NotFoundException (co.cask.cdap.common.NotFoundException)4 MetadataDataset (co.cask.cdap.data2.metadata.dataset.MetadataDataset)4 Lineage (co.cask.cdap.data2.metadata.lineage.Lineage)4 Relation (co.cask.cdap.data2.metadata.lineage.Relation)4 NamespaceId (co.cask.cdap.proto.id.NamespaceId)4 HashMap (java.util.HashMap)4 ConfigModule (co.cask.cdap.common.guice.ConfigModule)3 LocationRuntimeModule (co.cask.cdap.common.guice.LocationRuntimeModule)3 DataSetsModules (co.cask.cdap.data.runtime.DataSetsModules)3 SystemDatasetRuntimeModule (co.cask.cdap.data.runtime.SystemDatasetRuntimeModule)3