Search in sources :

Example 76 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class DatasetTypeManager method deleteModules.

/**
   * Deletes all modules in a namespace, other than system.
   * Presumes that the namespace has already been checked to be non-system.
   *
   * @param namespaceId the {@link NamespaceId} to delete modules from.
   */
public void deleteModules(final NamespaceId namespaceId) throws DatasetModuleConflictException {
    Preconditions.checkArgument(namespaceId != null && !NamespaceId.SYSTEM.equals(namespaceId), "Cannot delete modules from system namespace");
    LOG.warn("Deleting all modules from namespace {}", namespaceId);
    try {
        final DatasetTypeMDS datasetTypeMDS = datasetCache.getDataset(DatasetMetaTableUtil.META_TABLE_NAME);
        final DatasetInstanceMDS datasetInstanceMDS = datasetCache.getDataset(DatasetMetaTableUtil.INSTANCE_TABLE_NAME);
        txExecutorFactory.createExecutor(datasetCache).execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws DatasetModuleConflictException, IOException {
                final Set<String> typesToDelete = new HashSet<String>();
                final List<Location> moduleLocations = new ArrayList<>();
                final Collection<DatasetModuleMeta> modules = datasetTypeMDS.getModules(namespaceId);
                try {
                    impersonator.doAs(namespaceId, new Callable<Void>() {

                        @Override
                        public Void call() throws Exception {
                            for (DatasetModuleMeta module : modules) {
                                typesToDelete.addAll(module.getTypes());
                                moduleLocations.add(Locations.getLocationFromAbsolutePath(locationFactory, module.getJarLocationPath()));
                            }
                            return null;
                        }
                    });
                } catch (Exception e) {
                    // the callable throws no checked exceptions
                    Throwables.propagate(e);
                }
                // check if there are any instances that use types of these modules?
                Collection<DatasetSpecification> instances = datasetInstanceMDS.getByTypes(namespaceId, typesToDelete);
                // cannot delete when there's instance that uses it
                if (!instances.isEmpty()) {
                    throw new DatasetModuleConflictException("Cannot delete all modules: existing dataset instances depend on it. Delete them first");
                }
                datasetTypeMDS.deleteModules(namespaceId);
                // Delete module locations
                for (Location moduleLocation : moduleLocations) {
                    if (!moduleLocation.delete()) {
                        LOG.debug("Could not delete dataset module archive - {}", moduleLocation);
                    }
                }
            }
        });
    } catch (TransactionFailureException e) {
        if (e.getCause() != null && e.getCause() instanceof DatasetModuleConflictException) {
            throw (DatasetModuleConflictException) e.getCause();
        }
        LOG.error("Failed to delete all modules from namespace {}", namespaceId);
        throw Throwables.propagate(e);
    } catch (Exception e) {
        LOG.error("Operation failed", e);
        throw Throwables.propagate(e);
    }
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) DatasetInstanceMDS(co.cask.cdap.data2.datafabric.dataset.service.mds.DatasetInstanceMDS) TransactionExecutor(org.apache.tephra.TransactionExecutor) IOException(java.io.IOException) Callable(java.util.concurrent.Callable) TransactionFailureException(org.apache.tephra.TransactionFailureException) TypeConflictException(co.cask.cdap.data2.dataset2.TypeConflictException) IOException(java.io.IOException) DatasetTypeMDS(co.cask.cdap.data2.datafabric.dataset.service.mds.DatasetTypeMDS) TransactionFailureException(org.apache.tephra.TransactionFailureException) DatasetModuleMeta(co.cask.cdap.proto.DatasetModuleMeta) Collection(java.util.Collection) List(java.util.List) ArrayList(java.util.ArrayList) Location(org.apache.twill.filesystem.Location)

Example 77 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class DatasetTypeManager method deleteModule.

/**
   * Deletes specified dataset module
   * @param datasetModuleId {@link DatasetModuleId} of the dataset module to delete
   * @return true if deleted successfully, false if module didn't exist: nothing to delete
   * @throws DatasetModuleConflictException when there are other modules depend on the specified one, in which case
   *         deletion does NOT happen
   */
public boolean deleteModule(final DatasetModuleId datasetModuleId) throws DatasetModuleConflictException {
    LOG.info("Deleting module {}", datasetModuleId);
    try {
        final DatasetTypeMDS datasetTypeMDS = datasetCache.getDataset(DatasetMetaTableUtil.META_TABLE_NAME);
        final DatasetInstanceMDS datasetInstanceMDS = datasetCache.getDataset(DatasetMetaTableUtil.INSTANCE_TABLE_NAME);
        return txExecutorFactory.createExecutor(datasetCache).execute(new Callable<Boolean>() {

            @Override
            public Boolean call() throws DatasetModuleConflictException, IOException {
                final DatasetModuleMeta module = datasetTypeMDS.getModule(datasetModuleId);
                if (module == null) {
                    return false;
                }
                // cannot delete when there's module that uses it
                if (module.getUsedByModules().size() > 0) {
                    String msg = String.format("Cannot delete module %s: other modules depend on it. Delete them first", module);
                    throw new DatasetModuleConflictException(msg);
                }
                Collection<DatasetSpecification> instances = datasetInstanceMDS.getByTypes(datasetModuleId.getParent(), ImmutableSet.copyOf(module.getTypes()));
                // cannot delete when there's instance that uses it
                if (!instances.isEmpty()) {
                    String msg = String.format("Cannot delete module %s: other instances depend on it. Delete them first", module);
                    throw new DatasetModuleConflictException(msg);
                }
                // remove it from "usedBy" from other modules
                for (String usedModuleName : module.getUsesModules()) {
                    DatasetModuleId usedModuleId = new DatasetModuleId(datasetModuleId.getNamespace(), usedModuleName);
                    // not using getModuleWithFallback here because we want to know the namespace in which usedModule was found,
                    // so we can overwrite it in the MDS in the appropriate namespace
                    DatasetModuleMeta usedModule = datasetTypeMDS.getModule(usedModuleId);
                    // if the usedModule is not found in the current namespace, try finding it in the system namespace
                    if (usedModule == null) {
                        usedModuleId = NamespaceId.SYSTEM.datasetModule(usedModuleName);
                        usedModule = datasetTypeMDS.getModule(usedModuleId);
                        Preconditions.checkState(usedModule != null, "Could not find a module %s that the module %s uses.", usedModuleName, datasetModuleId.getEntityName());
                    }
                    usedModule.removeUsedByModule(datasetModuleId.getEntityName());
                    datasetTypeMDS.writeModule(usedModuleId.getParent(), usedModule);
                }
                datasetTypeMDS.deleteModule(datasetModuleId);
                try {
                    // Also delete module jar
                    Location moduleJarLocation = impersonator.doAs(datasetModuleId, new Callable<Location>() {

                        @Override
                        public Location call() throws Exception {
                            return Locations.getLocationFromAbsolutePath(locationFactory, module.getJarLocationPath());
                        }
                    });
                    if (!moduleJarLocation.delete()) {
                        LOG.debug("Could not delete dataset module archive");
                    }
                } catch (Exception e) {
                    // the only checked exception the try-catch throws is IOException
                    Throwables.propagateIfInstanceOf(e, IOException.class);
                    Throwables.propagate(e);
                }
                return true;
            }
        });
    } catch (TransactionFailureException e) {
        if (e.getCause() != null && e.getCause() instanceof DatasetModuleConflictException) {
            throw (DatasetModuleConflictException) e.getCause();
        }
        throw Throwables.propagate(e);
    } catch (Exception e) {
        LOG.error("Operation failed", e);
        throw Throwables.propagate(e);
    }
}
Also used : DatasetInstanceMDS(co.cask.cdap.data2.datafabric.dataset.service.mds.DatasetInstanceMDS) IOException(java.io.IOException) Callable(java.util.concurrent.Callable) TransactionFailureException(org.apache.tephra.TransactionFailureException) TypeConflictException(co.cask.cdap.data2.dataset2.TypeConflictException) IOException(java.io.IOException) DatasetTypeMDS(co.cask.cdap.data2.datafabric.dataset.service.mds.DatasetTypeMDS) DatasetModuleId(co.cask.cdap.proto.id.DatasetModuleId) TransactionFailureException(org.apache.tephra.TransactionFailureException) DatasetModuleMeta(co.cask.cdap.proto.DatasetModuleMeta) Collection(java.util.Collection) Location(org.apache.twill.filesystem.Location)

Example 78 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class PartitionedFileSetTest method testRollbackOfPartitionCreateThenDelete.

@Test
public void testRollbackOfPartitionCreateThenDelete() throws Exception {
    PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
    TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs);
    // because the previous transaction aborted, the partition as well as the file will not exist
    txContext.start();
    Assert.assertNull(pfs.getPartition(PARTITION_KEY));
    PartitionOutput partitionOutput = pfs.getPartitionOutput(PARTITION_KEY);
    Location outputLocation = partitionOutput.getLocation().append("file");
    Assert.assertFalse(outputLocation.exists());
    try (OutputStream outputStream = outputLocation.getOutputStream()) {
        // create and write 1 to the file
        outputStream.write(1);
    }
    Assert.assertTrue(outputLocation.exists());
    partitionOutput.addPartition();
    Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
    pfs.dropPartition(PARTITION_KEY);
    txContext.abort();
    // the file shouldn't exist because the transaction was aborted (AND because it was dropped at the end of the tx)
    Assert.assertFalse(outputLocation.exists());
}
Also used : PartitionOutput(co.cask.cdap.api.dataset.lib.PartitionOutput) TransactionContext(org.apache.tephra.TransactionContext) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 79 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class PartitionedFileSetTest method testRollbackOfPartitionDelete.

@Test
public void testRollbackOfPartitionDelete() throws Exception {
    PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
    TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs);
    txContext.start();
    PartitionOutput output = pfs.getPartitionOutput(PARTITION_KEY);
    Location outputLocation = output.getLocation().append("file");
    Assert.assertFalse(outputLocation.exists());
    try (OutputStream outputStream = outputLocation.getOutputStream()) {
        // write 1 to the first file
        outputStream.write(1);
    }
    Assert.assertTrue(outputLocation.exists());
    output.addPartition();
    Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
    Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists());
    txContext.finish();
    // because the previous transaction aborted, the partition as well as the file will not exist
    txContext.start();
    pfs.dropPartition(PARTITION_KEY);
    Assert.assertNull(pfs.getPartition(PARTITION_KEY));
    Assert.assertFalse(outputLocation.exists());
    // create a new partition with the same partition key (same relative path for the partition
    PartitionOutput partitionOutput2 = pfs.getPartitionOutput(PARTITION_KEY);
    Location outputLocation2 = partitionOutput2.getLocation().append("file");
    Assert.assertFalse(outputLocation2.exists());
    // create the file
    try (OutputStream outputStream = outputLocation2.getOutputStream()) {
        // write 2 to the second file
        outputStream.write(2);
    }
    Assert.assertTrue(outputLocation2.exists());
    partitionOutput2.addPartition();
    txContext.abort();
    // since the previous transaction aborted, the partition and its files should still exist
    txContext.start();
    Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
    Assert.assertTrue(outputLocation.exists());
    try (InputStream inputStream = outputLocation.getInputStream()) {
        // should be 1, written by the first partition, not 2 (which was written by the second partition)
        Assert.assertEquals(1, inputStream.read());
        // should be nothing else in the file
        Assert.assertEquals(0, inputStream.available());
    }
    txContext.finish();
}
Also used : PartitionOutput(co.cask.cdap.api.dataset.lib.PartitionOutput) TransactionContext(org.apache.tephra.TransactionContext) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 80 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class ClicksAndViewsMapReduceTest method getDataFromFile.

private Set<String> getDataFromFile() throws Exception {
    DataSetManager<PartitionedFileSet> cleanRecords = getDataset(ClicksAndViews.JOINED);
    Set<String> cleanData = new HashSet<>();
    // we configured the MapReduce to write to this partition when starting it
    PartitionDetail partition = cleanRecords.get().getPartition(PartitionKey.builder().addLongField("runtime", OUTPUT_PARTITION_RUNTIME).build());
    Assert.assertNotNull(partition);
    for (Location location : partition.getLocation().list()) {
        if (location.getName().startsWith("part-")) {
            try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(location.getInputStream()))) {
                String line;
                while ((line = bufferedReader.readLine()) != null) {
                    cleanData.add(line);
                }
            }
        }
    }
    return cleanData;
}
Also used : InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) HashSet(java.util.HashSet) Location(org.apache.twill.filesystem.Location)

Aggregations

Location (org.apache.twill.filesystem.Location)246 Test (org.junit.Test)104 IOException (java.io.IOException)57 File (java.io.File)39 LocalLocationFactory (org.apache.twill.filesystem.LocalLocationFactory)29 LocationFactory (org.apache.twill.filesystem.LocationFactory)29 FileSet (co.cask.cdap.api.dataset.lib.FileSet)28 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)27 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)23 CConfiguration (co.cask.cdap.common.conf.CConfiguration)19 NamespaceId (co.cask.cdap.proto.id.NamespaceId)19 Manifest (java.util.jar.Manifest)18 HashMap (java.util.HashMap)17 StreamId (co.cask.cdap.proto.id.StreamId)16 OutputStream (java.io.OutputStream)15 DatasetFramework (co.cask.cdap.data2.dataset2.DatasetFramework)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)11 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)10 ArrayList (java.util.ArrayList)9 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)8