use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.
the class DefaultMetadataStore method setProperties.
/**
* Adds/updates metadata for the specified {@link NamespacedEntityId}.
*/
@Override
public void setProperties(final MetadataScope scope, final NamespacedEntityId namespacedEntityId, final Map<String, String> properties) {
final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>();
execute(new TransactionExecutor.Procedure<MetadataDataset>() {
@Override
public void apply(MetadataDataset input) throws Exception {
Map<String, String> existingProperties = input.getProperties(namespacedEntityId);
Set<String> existingTags = input.getTags(namespacedEntityId);
previousRef.set(new MetadataRecord(namespacedEntityId, scope, existingProperties, existingTags));
for (Map.Entry<String, String> entry : properties.entrySet()) {
input.setProperty(namespacedEntityId, entry.getKey(), entry.getValue());
}
}
}, scope);
final ImmutableMap.Builder<String, String> propAdditions = ImmutableMap.builder();
final ImmutableMap.Builder<String, String> propDeletions = ImmutableMap.builder();
MetadataRecord previousRecord = previousRef.get();
// Iterating over properties all over again, because we want to move the diff calculation outside the transaction.
for (Map.Entry<String, String> entry : properties.entrySet()) {
String existingValue = previousRecord.getProperties().get(entry.getKey());
if (existingValue != null && existingValue.equals(entry.getValue())) {
// Value already exists and is the same as the value being passed. No update necessary.
continue;
}
// If it is an update, then mark a single deletion.
if (existingValue != null) {
propDeletions.put(entry.getKey(), existingValue);
}
// In both update or new cases, mark a single addition.
propAdditions.put(entry.getKey(), entry.getValue());
}
publishAudit(previousRecord, new MetadataRecord(namespacedEntityId, scope, propAdditions.build(), EMPTY_TAGS), new MetadataRecord(namespacedEntityId, scope, propDeletions.build(), EMPTY_TAGS));
}
use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.
the class DefaultMetadataStore method removeMetadata.
/**
* Removes all metadata (including properties and tags) for the specified {@link NamespacedEntityId}.
*/
@Override
public void removeMetadata(final MetadataScope scope, final NamespacedEntityId namespacedEntityId) {
final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>();
execute(new TransactionExecutor.Procedure<MetadataDataset>() {
@Override
public void apply(MetadataDataset input) throws Exception {
previousRef.set(new MetadataRecord(namespacedEntityId, scope, input.getProperties(namespacedEntityId), input.getTags(namespacedEntityId)));
input.removeProperties(namespacedEntityId);
input.removeTags(namespacedEntityId);
}
}, scope);
MetadataRecord previous = previousRef.get();
publishAudit(previous, new MetadataRecord(namespacedEntityId, scope), new MetadataRecord(previous));
}
use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.
the class FileMetadataCleanerTest method testScanAndDeleteOldMetadata.
@Test
public void testScanAndDeleteOldMetadata() throws Exception {
// use file meta data manager to write meta data in old format
// use file meta writer to write meta data in new format
// scan for old files and make sure we only get the old meta data entries.
DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
DatasetManager datasetManager = new DefaultDatasetManager(datasetFramework, NamespaceId.SYSTEM, co.cask.cdap.common.service.RetryStrategies.noRetry(), null);
Transactional transactional = Transactions.createTransactionalWithRetry(Transactions.createTransactional(new MultiThreadDatasetCache(new SystemDatasetInstantiator(datasetFramework), injector.getInstance(TransactionSystemClient.class), NamespaceId.SYSTEM, ImmutableMap.<String, String>of(), null, null)), RetryStrategies.retryOnConflict(20, 100));
FileMetaDataWriter fileMetaDataWriter = new FileMetaDataWriter(datasetManager, transactional);
FileMetaDataManager fileMetaDataManager = injector.getInstance(FileMetaDataManager.class);
LoggingContext flowContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testFlow", ProgramType.FLOW);
long eventTimestamp = System.currentTimeMillis();
LocationFactory locationFactory = injector.getInstance(LocationFactory.class);
Location testLocation = locationFactory.create("testFile");
try {
// write 50 entries in old format
for (int i = 0; i < 50; i++) {
fileMetaDataManager.writeMetaData(flowContext, eventTimestamp + i, testLocation);
}
LoggingContext wflowContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testWflow", ProgramType.WORKFLOW);
fileMetaDataManager.writeMetaData(wflowContext, eventTimestamp, testLocation);
LoggingContext mrContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testMR", ProgramType.MAPREDUCE);
fileMetaDataManager.writeMetaData(mrContext, eventTimestamp, testLocation);
LoggingContext sparkContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testSpark", ProgramType.SPARK);
fileMetaDataManager.writeMetaData(sparkContext, eventTimestamp, testLocation);
// write 50 entries in new format
long newEventTime = eventTimestamp + 1000;
long currentTime = newEventTime + 1000;
LogPathIdentifier logPathIdentifier = new LogPathIdentifier("testNs", "testApp", "testFlow");
for (int i = 50; i < 100; i++) {
fileMetaDataWriter.writeMetaData(logPathIdentifier, newEventTime + i, currentTime + i, testLocation);
}
FileMetaDataReader fileMetaDataReader = injector.getInstance(FileMetaDataReader.class);
Assert.assertEquals(50, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(flowContext), eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(1, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(wflowContext), eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(1, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(mrContext), eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(1, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(sparkContext), eventTimestamp - 1, eventTimestamp + 100).size());
FileMetadataCleaner fileMetadataCleaner = new FileMetadataCleaner(datasetManager, transactional);
fileMetadataCleaner.scanAndDeleteOldMetaData(TRANSACTION_TIMEOUT, CUTOFF_TIME_TRANSACTION);
// deleted all old metadata
Assert.assertEquals(0, fileMetaDataReader.listFiles(logPathIdentifier, eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(0, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(wflowContext), eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(0, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(mrContext), eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(0, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(sparkContext), eventTimestamp - 1, eventTimestamp + 100).size());
} finally {
// cleanup meta
cleanupMetadata(transactional, datasetManager);
}
}
use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.
the class UpgradeTool method createInjector.
@VisibleForTesting
Injector createInjector() throws Exception {
return Guice.createInjector(new ConfigModule(cConf, hConf), new LocationRuntimeModule().getDistributedModules(), new ZKClientModule(), new DiscoveryRuntimeModule().getDistributedModules(), new MessagingClientModule(), Modules.override(new DataSetsModules().getDistributedModules()).with(new AbstractModule() {
@Override
protected void configure() {
bind(DatasetFramework.class).to(InMemoryDatasetFramework.class).in(Scopes.SINGLETON);
// the DataSetsModules().getDistributedModules() binds to RemoteDatasetFramework so override that to
// the same InMemoryDatasetFramework
bind(DatasetFramework.class).annotatedWith(Names.named(DataSetsModules.BASE_DATASET_FRAMEWORK)).to(DatasetFramework.class);
install(new FactoryModuleBuilder().implement(DatasetDefinitionRegistry.class, DefaultDatasetDefinitionRegistry.class).build(DatasetDefinitionRegistryFactory.class));
// CDAP-5954 Upgrade tool does not need to record lineage and metadata changes for now.
bind(LineageWriter.class).to(NoOpLineageWriter.class);
}
}), new ViewAdminModules().getDistributedModules(), new StreamAdminModules().getDistributedModules(), new NotificationFeedClientModule(), new TwillModule(), new ExploreClientModule(), new ProgramRunnerRuntimeModule().getDistributedModules(), new ServiceStoreModules().getDistributedModules(), new SystemDatasetRuntimeModule().getDistributedModules(), // don't need real notifications for upgrade, so use the in-memory implementations
new NotificationServiceRuntimeModule().getInMemoryModules(), new KafkaClientModule(), new NamespaceStoreModule().getDistributedModules(), new AuthenticationContextModules().getMasterModule(), new AuthorizationModule(), new AuthorizationEnforcementModule().getMasterModule(), new SecureStoreModules().getDistributedModules(), new DataFabricModules(UpgradeTool.class.getName()).getDistributedModules(), new AppFabricServiceRuntimeModule().getDistributedModules(), new AbstractModule() {
@Override
protected void configure() {
// the DataFabricDistributedModule needs MetricsCollectionService binding and since Upgrade tool does not do
// anything with Metrics we just bind it to NoOpMetricsCollectionService
bind(MetricsCollectionService.class).to(NoOpMetricsCollectionService.class).in(Scopes.SINGLETON);
bind(MetricDatasetFactory.class).to(DefaultMetricDatasetFactory.class).in(Scopes.SINGLETON);
bind(MetricStore.class).to(DefaultMetricStore.class);
}
@Provides
@Singleton
@Named("datasetInstanceManager")
@SuppressWarnings("unused")
public DatasetInstanceManager getDatasetInstanceManager(TransactionSystemClientService txClient, TransactionExecutorFactory txExecutorFactory, @Named("datasetMDS") DatasetFramework framework) {
return new DatasetInstanceManager(txClient, txExecutorFactory, framework);
}
// This is needed because the LocalApplicationManager
// expects a dsframework injection named datasetMDS
@Provides
@Singleton
@Named("datasetMDS")
@SuppressWarnings("unused")
public DatasetFramework getInDsFramework(DatasetFramework dsFramework) {
return dsFramework;
}
});
}
use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.
the class SystemMetadataWriterStage method process.
@Override
public void process(ApplicationWithPrograms input) throws Exception {
// add system metadata for apps
ApplicationId appId = input.getApplicationId();
ApplicationSpecification appSpec = input.getSpecification();
// only update creation time if this is a new app
Map<String, String> properties = metadataStore.getProperties(MetadataScope.SYSTEM, appId);
SystemMetadataWriter appSystemMetadataWriter = new AppSystemMetadataWriter(metadataStore, appId, appSpec, !properties.isEmpty());
appSystemMetadataWriter.write();
// add system metadata for programs
writeProgramSystemMetadata(appId, ProgramType.FLOW, appSpec.getFlows().values());
writeProgramSystemMetadata(appId, ProgramType.MAPREDUCE, appSpec.getMapReduce().values());
writeProgramSystemMetadata(appId, ProgramType.SERVICE, appSpec.getServices().values());
writeProgramSystemMetadata(appId, ProgramType.SPARK, appSpec.getSpark().values());
writeProgramSystemMetadata(appId, ProgramType.WORKER, appSpec.getWorkers().values());
writeProgramSystemMetadata(appId, ProgramType.WORKFLOW, appSpec.getWorkflows().values());
// Emit input to the next stage
emit(input);
}
Aggregations