Search in sources :

Example 1 with DatasetInstanceTable

use of io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetInstanceTable in project cdap by caskdata.

the class DatasetTypeManager method addModule.

/**
 * Add datasets module in a namespace
 *
 * @param datasetModuleId the {@link DatasetModuleId} to add
 * @param className module class
 * @param jarLocation location of the module jar
 * @param force if true, an update will be allowed even if there are conflicts with other modules, or if
 *                     removal of a type would break other modules' dependencies.
 */
public void addModule(final DatasetModuleId datasetModuleId, final String className, final Location jarLocation, final boolean force) throws DatasetModuleConflictException {
    LOG.debug("adding module: {}, className: {}, jarLocation: {}", datasetModuleId, className, jarLocation == null ? "[local]" : jarLocation);
    try {
        TransactionRunners.run(transactionRunner, context -> {
            final DatasetTypeTable datasetTypeTable = DatasetTypeTable.create(context);
            final DatasetInstanceTable datasetInstanceTable = new DatasetInstanceTable(context);
            // 1. get existing module with all its types
            DatasetModuleMeta existing = datasetTypeTable.getModule(datasetModuleId);
            DependencyTrackingRegistry reg;
            // 2. unpack jar and create class loader
            ClassLoaderFolder classLoaderFolder = null;
            DirectoryClassLoader cl = null;
            try {
                // NOTE: if jarLocation is null, we assume that this is a system module, ie. always present in classpath
                if (jarLocation != null) {
                    classLoaderFolder = BundleJarUtil.prepareClassLoaderFolder(jarLocation, () -> Files.createTempDirectory(Files.createDirectories(systemTempPath), datasetModuleId.getEntityName()).toFile());
                    cl = new DirectoryClassLoader(classLoaderFolder.getDir(), cConf.get(Constants.AppFabric.PROGRAM_EXTRA_CLASSPATH), FilterClassLoader.create(getClass().getClassLoader()), "lib");
                }
                reg = new DependencyTrackingRegistry(datasetModuleId, datasetTypeTable, cl, force);
                // 3. register the new module while tracking dependencies.
                // this will fail if a type exists in a different module
                DatasetDefinitionRegistries.register(className, cl, reg);
            } catch (TypeConflictException e) {
                // type conflict from the registry, we want to throw that as is
                throw e;
            } catch (Exception e) {
                LOG.error("Could not instantiate instance of dataset module class {} for module {} using jarLocation {}", className, datasetModuleId, jarLocation);
                throw Throwables.propagate(e);
            } finally {
                // Close the ProgramClassLoader
                Closeables.closeQuietly(cl);
                Closeables.closeQuietly(classLoaderFolder);
            }
            // 4. determine whether any type were removed from the module, and whether any other modules depend on them
            if (existing != null) {
                Set<String> removedTypes = new HashSet<>(existing.getTypes());
                removedTypes.removeAll(reg.getTypes());
                // TODO (CDAP-6294): track dependencies at the type level
                if (!force && !removedTypes.isEmpty() && !existing.getUsedByModules().isEmpty()) {
                    throw new DatasetModuleConflictException(String.format("Cannot update module '%s' to remove types %s: Modules %s may depend on it. Delete them first", datasetModuleId, removedTypes, existing.getUsedByModules()));
                }
                Collection<DatasetSpecification> instances = datasetInstanceTable.getByTypes(datasetModuleId.getParent(), removedTypes);
                if (!instances.isEmpty()) {
                    throw new DatasetModuleConflictException(String.format("Attempt to remove dataset types %s from module '%s' that have existing instances: %s. " + "Delete them first.", removedTypes, datasetModuleId, instances.stream().map(input -> input.getName() + ":" + input.getType()).collect(Collectors.joining(", "))));
                }
            }
            // NOTE: we use set to avoid duplicated dependencies
            // NOTE: we use LinkedHashSet to preserve order in which dependencies must be loaded
            Set<String> moduleDependencies = new LinkedHashSet<String>();
            for (DatasetTypeId usedType : reg.getUsedTypes()) {
                DatasetModuleMeta usedModule = datasetTypeTable.getModuleByType(usedType);
                if (usedModule == null) {
                    throw new IllegalStateException(String.format("Found a null used module for type %s for while adding module %s", usedType, datasetModuleId));
                }
                // adding all used types and the module itself, in this very order to keep the order of loading modules
                // for instantiating a type
                moduleDependencies.addAll(usedModule.getUsesModules());
                boolean added = moduleDependencies.add(usedModule.getName());
                if (added) {
                    // also adding this module as a dependent for all modules it uses
                    usedModule.addUsedByModule(datasetModuleId.getEntityName());
                    datasetTypeTable.writeModule(usedType.getParent(), usedModule);
                }
            }
            URI jarURI = jarLocation == null ? null : jarLocation.toURI();
            DatasetModuleMeta moduleMeta = existing == null ? new DatasetModuleMeta(datasetModuleId.getEntityName(), className, jarURI, reg.getTypes(), Lists.newArrayList(moduleDependencies)) : new DatasetModuleMeta(datasetModuleId.getEntityName(), className, jarURI, reg.getTypes(), Lists.newArrayList(moduleDependencies), Lists.newArrayList(existing.getUsedByModules()));
            datasetTypeTable.writeModule(datasetModuleId.getParent(), moduleMeta);
        });
    } catch (RuntimeException e) {
        for (Throwable cause : Throwables.getCausalChain(e)) {
            if (cause instanceof DatasetModuleConflictException) {
                throw (DatasetModuleConflictException) cause;
            } else if (cause instanceof TypeConflictException) {
                throw new DatasetModuleConflictException(cause.getMessage(), cause);
            }
        }
        throw Throwables.propagate(e);
    } catch (Exception e) {
        LOG.error("Operation failed", e);
        throw Throwables.propagate(e);
    }
}
Also used : TransactionRunners(io.cdap.cdap.spi.data.transaction.TransactionRunners) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) Location(org.apache.twill.filesystem.Location) DatasetTypeMeta(io.cdap.cdap.proto.DatasetTypeMeta) Inject(com.google.inject.Inject) LoggerFactory(org.slf4j.LoggerFactory) Callable(java.util.concurrent.Callable) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) InMemoryDatasetDefinitionRegistry(io.cdap.cdap.data2.dataset2.InMemoryDatasetDefinitionRegistry) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Lists(com.google.common.collect.Lists) Locations(io.cdap.cdap.common.io.Locations) Closeables(com.google.common.io.Closeables) DatasetTypeId(io.cdap.cdap.proto.id.DatasetTypeId) URI(java.net.URI) Path(java.nio.file.Path) LinkedHashSet(java.util.LinkedHashSet) Nullable(javax.annotation.Nullable) DatasetModuleId(io.cdap.cdap.proto.id.DatasetModuleId) ImmutableSet(com.google.common.collect.ImmutableSet) Logger(org.slf4j.Logger) Files(java.nio.file.Files) DatasetModuleMeta(io.cdap.cdap.proto.DatasetModuleMeta) Collection(java.util.Collection) Throwables(com.google.common.base.Throwables) DatasetDefinitionRegistry(io.cdap.cdap.api.dataset.module.DatasetDefinitionRegistry) Impersonator(io.cdap.cdap.security.impersonation.Impersonator) Set(java.util.Set) IOException(java.io.IOException) LocationFactory(org.apache.twill.filesystem.LocationFactory) TypeConflictException(io.cdap.cdap.data2.dataset2.TypeConflictException) Collectors(java.util.stream.Collectors) FilterClassLoader(io.cdap.cdap.common.lang.FilterClassLoader) List(java.util.List) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) Paths(java.nio.file.Paths) DatasetInstanceTable(io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetInstanceTable) DirectoryClassLoader(io.cdap.cdap.common.lang.DirectoryClassLoader) BundleJarUtil(io.cdap.cdap.common.lang.jar.BundleJarUtil) DatasetDefinitionRegistries(io.cdap.cdap.data2.dataset2.DatasetDefinitionRegistries) TransactionRunner(io.cdap.cdap.spi.data.transaction.TransactionRunner) Preconditions(com.google.common.base.Preconditions) Constants(io.cdap.cdap.common.conf.Constants) ClassLoaderFolder(io.cdap.cdap.common.lang.jar.ClassLoaderFolder) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DatasetDefinition(io.cdap.cdap.api.dataset.DatasetDefinition) DatasetTypeTable(io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetTypeTable) LinkedHashSet(java.util.LinkedHashSet) DirectoryClassLoader(io.cdap.cdap.common.lang.DirectoryClassLoader) TypeConflictException(io.cdap.cdap.data2.dataset2.TypeConflictException) DatasetTypeId(io.cdap.cdap.proto.id.DatasetTypeId) DatasetTypeTable(io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetTypeTable) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) ClassLoaderFolder(io.cdap.cdap.common.lang.jar.ClassLoaderFolder) URI(java.net.URI) IOException(java.io.IOException) TypeConflictException(io.cdap.cdap.data2.dataset2.TypeConflictException) DatasetInstanceTable(io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetInstanceTable) DatasetModuleMeta(io.cdap.cdap.proto.DatasetModuleMeta) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 2 with DatasetInstanceTable

use of io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetInstanceTable in project cdap by caskdata.

the class DatasetTypeManager method deleteModule.

/**
 * Deletes specified dataset module
 * @param datasetModuleId {@link DatasetModuleId} of the dataset module to delete
 * @return true if deleted successfully, false if module didn't exist: nothing to delete
 * @throws DatasetModuleConflictException when there are other modules depend on the specified one, in which case
 *         deletion does NOT happen
 */
public boolean deleteModule(final DatasetModuleId datasetModuleId) throws DatasetModuleConflictException {
    LOG.info("Deleting module {}", datasetModuleId);
    try {
        return TransactionRunners.run(transactionRunner, context -> {
            final DatasetTypeTable datasetTypeTable = DatasetTypeTable.create(context);
            final DatasetInstanceTable datasetInstanceTable = new DatasetInstanceTable(context);
            final DatasetModuleMeta module = datasetTypeTable.getModule(datasetModuleId);
            if (module == null) {
                return false;
            }
            // cannot delete when there's module that uses it
            if (module.getUsedByModules().size() > 0) {
                String msg = String.format("Cannot delete module %s: other modules depend on it. Delete them first", module);
                throw new DatasetModuleConflictException(msg);
            }
            Collection<DatasetSpecification> instances = datasetInstanceTable.getByTypes(datasetModuleId.getParent(), ImmutableSet.copyOf(module.getTypes()));
            // cannot delete when there's instance that uses it
            if (!instances.isEmpty()) {
                String msg = String.format("Cannot delete module %s: other instances depend on it. Delete them first", module);
                throw new DatasetModuleConflictException(msg);
            }
            // remove it from "usedBy" from other modules
            for (String usedModuleName : module.getUsesModules()) {
                DatasetModuleId usedModuleId = new DatasetModuleId(datasetModuleId.getNamespace(), usedModuleName);
                // not using getModuleWithFallback here because we want to know the namespace in which usedModule was found,
                // so we can overwrite it in the MDS in the appropriate namespace
                DatasetModuleMeta usedModule = datasetTypeTable.getModule(usedModuleId);
                // if the usedModule is not found in the current namespace, try finding it in the system namespace
                if (usedModule == null) {
                    usedModuleId = NamespaceId.SYSTEM.datasetModule(usedModuleName);
                    usedModule = datasetTypeTable.getModule(usedModuleId);
                    Preconditions.checkState(usedModule != null, "Could not find a module %s that the module %s uses.", usedModuleName, datasetModuleId.getEntityName());
                }
                usedModule.removeUsedByModule(datasetModuleId.getEntityName());
                datasetTypeTable.writeModule(usedModuleId.getParent(), usedModule);
            }
            datasetTypeTable.deleteModule(datasetModuleId);
            try {
                // Also delete module jar
                Location moduleJarLocation = impersonator.doAs(datasetModuleId, () -> Locations.getLocationFromAbsolutePath(locationFactory, module.getJarLocationPath()));
                if (!moduleJarLocation.delete()) {
                    LOG.debug("Could not delete dataset module archive");
                }
            } catch (Exception e) {
                // the only checked exception the try-catch throws is IOException
                Throwables.propagateIfInstanceOf(e, IOException.class);
                throw Throwables.propagate(e);
            }
            return true;
        });
    } catch (RuntimeException e) {
        for (Throwable cause : Throwables.getCausalChain(e)) {
            if (cause instanceof DatasetModuleConflictException) {
                throw (DatasetModuleConflictException) cause;
            }
        }
        throw Throwables.propagate(e);
    } catch (Exception e) {
        LOG.error("Operation failed", e);
        throw Throwables.propagate(e);
    }
}
Also used : DatasetTypeTable(io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetTypeTable) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) IOException(java.io.IOException) IOException(java.io.IOException) TypeConflictException(io.cdap.cdap.data2.dataset2.TypeConflictException) DatasetInstanceTable(io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetInstanceTable) DatasetModuleId(io.cdap.cdap.proto.id.DatasetModuleId) DatasetModuleMeta(io.cdap.cdap.proto.DatasetModuleMeta) Location(org.apache.twill.filesystem.Location)

Example 3 with DatasetInstanceTable

use of io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetInstanceTable in project cdap by caskdata.

the class DatasetTypeManager method deleteModules.

/**
 * Deletes all modules in a namespace, other than system.
 * Presumes that the namespace has already been checked to be non-system.
 *
 * @param namespaceId the {@link NamespaceId} to delete modules from.
 */
public void deleteModules(final NamespaceId namespaceId) throws DatasetModuleConflictException {
    Preconditions.checkArgument(namespaceId != null && !NamespaceId.SYSTEM.equals(namespaceId), "Cannot delete modules from system namespace");
    LOG.info("Deleting all modules from namespace {}", namespaceId);
    try {
        TransactionRunners.run(transactionRunner, context -> {
            final DatasetTypeTable datasetTypeTable = DatasetTypeTable.create(context);
            final DatasetInstanceTable datasetInstanceTable = new DatasetInstanceTable(context);
            final Set<String> typesToDelete = new HashSet<String>();
            final List<Location> moduleLocations = new ArrayList<>();
            final Collection<DatasetModuleMeta> modules = datasetTypeTable.getModules(namespaceId);
            try {
                impersonator.doAs(namespaceId, new Callable<Void>() {

                    @Override
                    public Void call() throws Exception {
                        for (DatasetModuleMeta module : modules) {
                            typesToDelete.addAll(module.getTypes());
                            moduleLocations.add(Locations.getLocationFromAbsolutePath(locationFactory, module.getJarLocationPath()));
                        }
                        return null;
                    }
                });
            } catch (Exception e) {
                // the callable throws no checked exceptions
                throw Throwables.propagate(e);
            }
            // check if there are any instances that use types of these modules?
            Collection<DatasetSpecification> instances = datasetInstanceTable.getByTypes(namespaceId, typesToDelete);
            // cannot delete when there's instance that uses it
            if (!instances.isEmpty()) {
                throw new DatasetModuleConflictException("Cannot delete all modules: existing dataset instances depend on it. Delete them first");
            }
            datasetTypeTable.deleteModules(namespaceId);
            // Delete module locations
            for (Location moduleLocation : moduleLocations) {
                if (!moduleLocation.delete()) {
                    LOG.debug("Could not delete dataset module archive - {}", moduleLocation);
                }
            }
        });
    } catch (RuntimeException e) {
        for (Throwable cause : Throwables.getCausalChain(e)) {
            if (cause instanceof DatasetModuleConflictException) {
                throw (DatasetModuleConflictException) cause;
            }
        }
        LOG.error("Failed to delete all modules from namespace {}", namespaceId);
        throw Throwables.propagate(e);
    } catch (Exception e) {
        LOG.error("Operation failed", e);
        throw Throwables.propagate(e);
    }
}
Also used : DatasetTypeTable(io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetTypeTable) ArrayList(java.util.ArrayList) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) IOException(java.io.IOException) TypeConflictException(io.cdap.cdap.data2.dataset2.TypeConflictException) DatasetInstanceTable(io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetInstanceTable) DatasetModuleMeta(io.cdap.cdap.proto.DatasetModuleMeta) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Location(org.apache.twill.filesystem.Location)

Aggregations

DatasetSpecification (io.cdap.cdap.api.dataset.DatasetSpecification)3 DatasetInstanceTable (io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetInstanceTable)3 DatasetTypeTable (io.cdap.cdap.data2.datafabric.dataset.service.mds.DatasetTypeTable)3 TypeConflictException (io.cdap.cdap.data2.dataset2.TypeConflictException)3 DatasetModuleMeta (io.cdap.cdap.proto.DatasetModuleMeta)3 IOException (java.io.IOException)3 Location (org.apache.twill.filesystem.Location)3 DatasetModuleId (io.cdap.cdap.proto.id.DatasetModuleId)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 LinkedHashSet (java.util.LinkedHashSet)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions (com.google.common.base.Preconditions)1 Throwables (com.google.common.base.Throwables)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Lists (com.google.common.collect.Lists)1 Closeables (com.google.common.io.Closeables)1 Inject (com.google.inject.Inject)1 DatasetDefinition (io.cdap.cdap.api.dataset.DatasetDefinition)1 DatasetDefinitionRegistry (io.cdap.cdap.api.dataset.module.DatasetDefinitionRegistry)1