Search in sources :

Example 1 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class SparkTestRun method testSparkWithGetDataset.

private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception {
    ApplicationManager applicationManager = deploy(appClass);
    DataSetManager<FileSet> filesetManager = getDataset("logs");
    FileSet fileset = filesetManager.get();
    Location location = fileset.getLocation("nn");
    prepareInputFileSetWithLogData(location);
    Map<String, String> inputArgs = new HashMap<>();
    FileSetArguments.setInputPath(inputArgs, "nn");
    Map<String, String> args = new HashMap<>();
    args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs));
    args.put("input", "logs");
    args.put("output", "logStats");
    SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats");
    KeyValueTable logStatsTable = logStatsManager.get();
    validateGetDatasetOutput(logStatsTable);
    // Cleanup after run
    location.delete(true);
    logStatsManager.flush();
    try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
        while (scan.hasNext()) {
            logStatsTable.delete(scan.next().getKey());
        }
    }
    logStatsManager.flush();
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) FileSet(co.cask.cdap.api.dataset.lib.FileSet) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Location(org.apache.twill.filesystem.Location)

Example 2 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class SparkTestRun method testStreamFormatSpec.

@Test
public void testStreamFormatSpec() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    StreamManager stream = getStreamManager("PeopleStream");
    stream.send("Old Man,50");
    stream.send("Baby,1");
    stream.send("Young Guy,18");
    stream.send("Small Kid,5");
    stream.send("Legal Drinker,21");
    Map<String, String> outputArgs = new HashMap<>();
    FileSetArguments.setOutputPath(outputArgs, "output");
    Map<String, String> runtimeArgs = new HashMap<>();
    runtimeArgs.putAll(RuntimeArguments.addScope(Scope.DATASET, "PeopleFileSet", outputArgs));
    runtimeArgs.put("stream.name", "PeopleStream");
    runtimeArgs.put("output.dataset", "PeopleFileSet");
    runtimeArgs.put("sql.statement", "SELECT name, age FROM people WHERE age >= 21");
    List<String> programs = Arrays.asList(ScalaStreamFormatSpecSpark.class.getSimpleName(), StreamFormatSpecSpark.class.getSimpleName());
    for (String sparkProgramName : programs) {
        // Clean the output before starting
        DataSetManager<FileSet> fileSetManager = getDataset("PeopleFileSet");
        Location outputDir = fileSetManager.get().getLocation("output");
        outputDir.delete(true);
        SparkManager sparkManager = appManager.getSparkManager(sparkProgramName);
        sparkManager.start(runtimeArgs);
        sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
        // Find the output part file. There is only one because the program repartition to 1
        Location outputFile = Iterables.find(outputDir.list(), new Predicate<Location>() {

            @Override
            public boolean apply(Location input) {
                return input.getName().startsWith("part-r-");
            }
        });
        // Verify the result
        List<String> lines = CharStreams.readLines(CharStreams.newReaderSupplier(Locations.newInputSupplier(outputFile), Charsets.UTF_8));
        Map<String, Integer> result = new HashMap<>();
        for (String line : lines) {
            String[] parts = line.split(":");
            result.put(parts[0], Integer.parseInt(parts[1]));
        }
        Assert.assertEquals(ImmutableMap.of("Old Man", 50, "Legal Drinker", 21), result);
    }
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) FileSet(co.cask.cdap.api.dataset.lib.FileSet) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) ScalaStreamFormatSpecSpark(co.cask.cdap.spark.app.ScalaStreamFormatSpecSpark) StreamFormatSpecSpark(co.cask.cdap.spark.app.StreamFormatSpecSpark) StreamManager(co.cask.cdap.test.StreamManager) ScalaStreamFormatSpecSpark(co.cask.cdap.spark.app.ScalaStreamFormatSpecSpark) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 3 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class DynamicPartitionerWithAvroTest method runDynamicPartitionerMapReduce.

private void runDynamicPartitionerMapReduce(final List<? extends GenericRecord> records, boolean allowConcurrentWriters, boolean expectedStatus) throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingAvroDynamicPartitioner.class);
    final long now = System.currentTimeMillis();
    final Multimap<PartitionKey, GenericRecord> keyToRecordsMap = groupByPartitionKey(records, now);
    // write values to the input kvTable
    final KeyValueTable kvTable = datasetCache.getDataset(INPUT_DATASET);
    Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            // the keys are not used; it matters that they're unique though
            for (int i = 0; i < records.size(); i++) {
                kvTable.write(Integer.toString(i), records.get(i).toString());
            }
        }
    });
    String allowConcurrencyKey = "dataset." + OUTPUT_DATASET + "." + PartitionedFileSetArguments.DYNAMIC_PARTITIONER_ALLOW_CONCURRENCY;
    // run the partition writer m/r with this output partition time
    ImmutableMap<String, String> arguments = ImmutableMap.of(OUTPUT_PARTITION_KEY, Long.toString(now), allowConcurrencyKey, Boolean.toString(allowConcurrentWriters));
    long startTime = System.currentTimeMillis();
    boolean status = runProgram(app, AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.class, new BasicArguments(arguments));
    Assert.assertEquals(expectedStatus, status);
    if (!expectedStatus) {
        // if we expect the program to fail, no need to check the output data for expected results
        return;
    }
    // Verify notifications
    List<Notification> notifications = getDataNotifications(startTime);
    Assert.assertEquals(1, notifications.size());
    Assert.assertEquals(NamespaceId.DEFAULT.dataset(OUTPUT_DATASET), DatasetId.fromString(notifications.get(0).getProperties().get("datasetId")));
    // this should have created a partition in the pfs
    final PartitionedFileSet pfs = datasetCache.getDataset(OUTPUT_DATASET);
    final Location pfsBaseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws IOException {
            Map<PartitionKey, PartitionDetail> partitions = new HashMap<>();
            for (PartitionDetail partition : pfs.getPartitions(null)) {
                partitions.put(partition.getPartitionKey(), partition);
                // check that the mapreduce wrote the output partition metadata to all the output partitions
                Assert.assertEquals(AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.METADATA, partition.getMetadata().asMap());
            }
            Assert.assertEquals(3, partitions.size());
            Assert.assertEquals(keyToRecordsMap.keySet(), partitions.keySet());
            // Check relative paths of the partitions. Also check that their location = pfs baseLocation + relativePath
            for (Map.Entry<PartitionKey, PartitionDetail> partitionKeyEntry : partitions.entrySet()) {
                PartitionDetail partitionDetail = partitionKeyEntry.getValue();
                String relativePath = partitionDetail.getRelativePath();
                int zip = (int) partitionKeyEntry.getKey().getField("zip");
                Assert.assertEquals(Long.toString(now) + Path.SEPARATOR + zip, relativePath);
                Assert.assertEquals(pfsBaseLocation.append(relativePath), partitionDetail.getLocation());
            }
            for (Map.Entry<PartitionKey, Collection<GenericRecord>> keyToRecordsEntry : keyToRecordsMap.asMap().entrySet()) {
                Set<GenericRecord> genericRecords = new HashSet<>(keyToRecordsEntry.getValue());
                Assert.assertEquals(genericRecords, readOutput(partitions.get(keyToRecordsEntry.getKey()).getLocation()));
            }
        }
    });
}
Also used : HashSet(java.util.HashSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Set(java.util.Set) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) Notification(co.cask.cdap.proto.Notification) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) GenericRecord(org.apache.avro.generic.GenericRecord) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) IOException(java.io.IOException) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Location(org.apache.twill.filesystem.Location)

Example 4 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class RemotePrivilegesTest method setup.

@BeforeClass
public static void setup() throws IOException, InterruptedException {
    CConfiguration cConf = CConfiguration.create();
    cConf.set(Constants.CFG_LOCAL_DATA_DIR, TEMPORARY_FOLDER.newFolder().getAbsolutePath());
    cConf.setBoolean(Constants.Security.ENABLED, true);
    cConf.setBoolean(Constants.Security.KERBEROS_ENABLED, false);
    cConf.setBoolean(Constants.Security.Authorization.ENABLED, true);
    cConf.setInt(Constants.Security.Authorization.CACHE_MAX_ENTRIES, 10000);
    cConf.setInt(Constants.Security.Authorization.CACHE_TTL_SECS, CACHE_TIMEOUT);
    Manifest manifest = new Manifest();
    manifest.getMainAttributes().put(Attributes.Name.MAIN_CLASS, InMemoryAuthorizer.class.getName());
    LocationFactory locationFactory = new LocalLocationFactory(TEMPORARY_FOLDER.newFolder());
    Location externalAuthJar = AppJarHelper.createDeploymentJar(locationFactory, InMemoryAuthorizer.class, manifest);
    cConf.set(Constants.Security.Authorization.EXTENSION_JAR_PATH, externalAuthJar.toString());
    Injector injector = AppFabricTestHelper.getInjector(cConf);
    discoveryService = injector.getInstance(DiscoveryServiceClient.class);
    appFabricServer = injector.getInstance(AppFabricServer.class);
    appFabricServer.startAndWait();
    waitForService(Constants.Service.APP_FABRIC_HTTP);
    authorizationEnforcer = injector.getInstance(RemoteAuthorizationEnforcer.class);
    privilegesManager = injector.getInstance(PrivilegesManager.class);
}
Also used : DiscoveryServiceClient(org.apache.twill.discovery.DiscoveryServiceClient) RemoteAuthorizationEnforcer(co.cask.cdap.security.authorization.RemoteAuthorizationEnforcer) InMemoryAuthorizer(co.cask.cdap.security.authorization.InMemoryAuthorizer) Injector(com.google.inject.Injector) AppFabricServer(co.cask.cdap.internal.app.services.AppFabricServer) PrivilegesManager(co.cask.cdap.security.spi.authorization.PrivilegesManager) Manifest(java.util.jar.Manifest) CConfiguration(co.cask.cdap.common.conf.CConfiguration) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) LocationFactory(org.apache.twill.filesystem.LocationFactory) Location(org.apache.twill.filesystem.Location) BeforeClass(org.junit.BeforeClass)

Example 5 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class AuthorizationBootstrapperTest method setup.

@BeforeClass
public static void setup() throws Exception {
    CConfiguration cConf = CConfiguration.create();
    cConf.set(Constants.CFG_LOCAL_DATA_DIR, TMP_FOLDER.newFolder().getAbsolutePath());
    cConf.setBoolean(Constants.Security.ENABLED, true);
    cConf.setBoolean(Constants.Security.KERBEROS_ENABLED, false);
    cConf.setBoolean(Constants.Security.Authorization.ENABLED, true);
    Location deploymentJar = AppJarHelper.createDeploymentJar(new LocalLocationFactory(TMP_FOLDER.newFolder()), InMemoryAuthorizer.class);
    cConf.set(Constants.Security.Authorization.EXTENSION_JAR_PATH, deploymentJar.toURI().getPath());
    // make Alice an admin user, so she can create namespaces
    cConf.set(Constants.Security.Authorization.ADMIN_USERS, ADMIN_USER.getName());
    instanceId = new InstanceId(cConf.get(Constants.INSTANCE_NAME));
    // setup a system artifact
    File systemArtifactsDir = TMP_FOLDER.newFolder();
    cConf.set(Constants.AppFabric.SYSTEM_ARTIFACTS_DIR, systemArtifactsDir.getAbsolutePath());
    createSystemArtifact(systemArtifactsDir);
    Injector injector = Guice.createInjector(new AppFabricTestModule(cConf));
    namespaceQueryAdmin = injector.getInstance(NamespaceQueryAdmin.class);
    namespaceAdmin = injector.getInstance(NamespaceAdmin.class);
    defaultNamespaceEnsurer = new DefaultNamespaceEnsurer(namespaceAdmin);
    discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);
    txManager = injector.getInstance(TransactionManager.class);
    datasetService = injector.getInstance(DatasetService.class);
    systemArtifactLoader = injector.getInstance(SystemArtifactLoader.class);
    authorizationBootstrapper = injector.getInstance(AuthorizationBootstrapper.class);
    artifactRepository = injector.getInstance(ArtifactRepository.class);
    dsFramework = injector.getInstance(DatasetFramework.class);
    authorizationEnforcer = injector.getInstance(AuthorizationEnforcer.class);
}
Also used : DiscoveryServiceClient(org.apache.twill.discovery.DiscoveryServiceClient) InstanceId(co.cask.cdap.proto.id.InstanceId) NamespaceAdmin(co.cask.cdap.common.namespace.NamespaceAdmin) DatasetService(co.cask.cdap.data2.datafabric.dataset.service.DatasetService) ArtifactRepository(co.cask.cdap.internal.app.runtime.artifact.ArtifactRepository) AuthorizationEnforcer(co.cask.cdap.security.spi.authorization.AuthorizationEnforcer) DefaultNamespaceEnsurer(co.cask.cdap.internal.app.namespace.DefaultNamespaceEnsurer) CConfiguration(co.cask.cdap.common.conf.CConfiguration) DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) AuthorizationBootstrapper(co.cask.cdap.security.authorization.AuthorizationBootstrapper) Injector(com.google.inject.Injector) TransactionManager(org.apache.tephra.TransactionManager) SystemArtifactLoader(co.cask.cdap.internal.app.runtime.artifact.SystemArtifactLoader) NamespaceQueryAdmin(co.cask.cdap.common.namespace.NamespaceQueryAdmin) AppFabricTestModule(co.cask.cdap.internal.guice.AppFabricTestModule) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) File(java.io.File) Location(org.apache.twill.filesystem.Location) BeforeClass(org.junit.BeforeClass)

Aggregations

Location (org.apache.twill.filesystem.Location)272 Test (org.junit.Test)110 IOException (java.io.IOException)67 File (java.io.File)45 FileSet (co.cask.cdap.api.dataset.lib.FileSet)32 LocationFactory (org.apache.twill.filesystem.LocationFactory)32 LocalLocationFactory (org.apache.twill.filesystem.LocalLocationFactory)31 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)27 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)27 CConfiguration (co.cask.cdap.common.conf.CConfiguration)20 HashMap (java.util.HashMap)20 NamespaceId (co.cask.cdap.proto.id.NamespaceId)19 Manifest (java.util.jar.Manifest)18 StreamId (co.cask.cdap.proto.id.StreamId)17 ArrayList (java.util.ArrayList)15 DatasetFramework (co.cask.cdap.data2.dataset2.DatasetFramework)13 OutputStream (java.io.OutputStream)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)11 ApplicationManager (co.cask.cdap.test.ApplicationManager)11 HashSet (java.util.HashSet)11