use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class SparkTestRun method testSparkWithGetDataset.
private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception {
ApplicationManager applicationManager = deploy(appClass);
DataSetManager<FileSet> filesetManager = getDataset("logs");
FileSet fileset = filesetManager.get();
Location location = fileset.getLocation("nn");
prepareInputFileSetWithLogData(location);
Map<String, String> inputArgs = new HashMap<>();
FileSetArguments.setInputPath(inputArgs, "nn");
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs));
args.put("input", "logs");
args.put("output", "logStats");
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats");
KeyValueTable logStatsTable = logStatsManager.get();
validateGetDatasetOutput(logStatsTable);
// Cleanup after run
location.delete(true);
logStatsManager.flush();
try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
while (scan.hasNext()) {
logStatsTable.delete(scan.next().getKey());
}
}
logStatsManager.flush();
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class SparkTestRun method testStreamFormatSpec.
@Test
public void testStreamFormatSpec() throws Exception {
ApplicationManager appManager = deploy(TestSparkApp.class);
StreamManager stream = getStreamManager("PeopleStream");
stream.send("Old Man,50");
stream.send("Baby,1");
stream.send("Young Guy,18");
stream.send("Small Kid,5");
stream.send("Legal Drinker,21");
Map<String, String> outputArgs = new HashMap<>();
FileSetArguments.setOutputPath(outputArgs, "output");
Map<String, String> runtimeArgs = new HashMap<>();
runtimeArgs.putAll(RuntimeArguments.addScope(Scope.DATASET, "PeopleFileSet", outputArgs));
runtimeArgs.put("stream.name", "PeopleStream");
runtimeArgs.put("output.dataset", "PeopleFileSet");
runtimeArgs.put("sql.statement", "SELECT name, age FROM people WHERE age >= 21");
List<String> programs = Arrays.asList(ScalaStreamFormatSpecSpark.class.getSimpleName(), StreamFormatSpecSpark.class.getSimpleName());
for (String sparkProgramName : programs) {
// Clean the output before starting
DataSetManager<FileSet> fileSetManager = getDataset("PeopleFileSet");
Location outputDir = fileSetManager.get().getLocation("output");
outputDir.delete(true);
SparkManager sparkManager = appManager.getSparkManager(sparkProgramName);
sparkManager.start(runtimeArgs);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
// Find the output part file. There is only one because the program repartition to 1
Location outputFile = Iterables.find(outputDir.list(), new Predicate<Location>() {
@Override
public boolean apply(Location input) {
return input.getName().startsWith("part-r-");
}
});
// Verify the result
List<String> lines = CharStreams.readLines(CharStreams.newReaderSupplier(Locations.newInputSupplier(outputFile), Charsets.UTF_8));
Map<String, Integer> result = new HashMap<>();
for (String line : lines) {
String[] parts = line.split(":");
result.put(parts[0], Integer.parseInt(parts[1]));
}
Assert.assertEquals(ImmutableMap.of("Old Man", 50, "Legal Drinker", 21), result);
}
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class DynamicPartitionerWithAvroTest method runDynamicPartitionerMapReduce.
private void runDynamicPartitionerMapReduce(final List<? extends GenericRecord> records, boolean allowConcurrentWriters, boolean expectedStatus) throws Exception {
ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingAvroDynamicPartitioner.class);
final long now = System.currentTimeMillis();
final Multimap<PartitionKey, GenericRecord> keyToRecordsMap = groupByPartitionKey(records, now);
// write values to the input kvTable
final KeyValueTable kvTable = datasetCache.getDataset(INPUT_DATASET);
Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the keys are not used; it matters that they're unique though
for (int i = 0; i < records.size(); i++) {
kvTable.write(Integer.toString(i), records.get(i).toString());
}
}
});
String allowConcurrencyKey = "dataset." + OUTPUT_DATASET + "." + PartitionedFileSetArguments.DYNAMIC_PARTITIONER_ALLOW_CONCURRENCY;
// run the partition writer m/r with this output partition time
ImmutableMap<String, String> arguments = ImmutableMap.of(OUTPUT_PARTITION_KEY, Long.toString(now), allowConcurrencyKey, Boolean.toString(allowConcurrentWriters));
long startTime = System.currentTimeMillis();
boolean status = runProgram(app, AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.class, new BasicArguments(arguments));
Assert.assertEquals(expectedStatus, status);
if (!expectedStatus) {
// if we expect the program to fail, no need to check the output data for expected results
return;
}
// Verify notifications
List<Notification> notifications = getDataNotifications(startTime);
Assert.assertEquals(1, notifications.size());
Assert.assertEquals(NamespaceId.DEFAULT.dataset(OUTPUT_DATASET), DatasetId.fromString(notifications.get(0).getProperties().get("datasetId")));
// this should have created a partition in the pfs
final PartitionedFileSet pfs = datasetCache.getDataset(OUTPUT_DATASET);
final Location pfsBaseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws IOException {
Map<PartitionKey, PartitionDetail> partitions = new HashMap<>();
for (PartitionDetail partition : pfs.getPartitions(null)) {
partitions.put(partition.getPartitionKey(), partition);
// check that the mapreduce wrote the output partition metadata to all the output partitions
Assert.assertEquals(AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.METADATA, partition.getMetadata().asMap());
}
Assert.assertEquals(3, partitions.size());
Assert.assertEquals(keyToRecordsMap.keySet(), partitions.keySet());
// Check relative paths of the partitions. Also check that their location = pfs baseLocation + relativePath
for (Map.Entry<PartitionKey, PartitionDetail> partitionKeyEntry : partitions.entrySet()) {
PartitionDetail partitionDetail = partitionKeyEntry.getValue();
String relativePath = partitionDetail.getRelativePath();
int zip = (int) partitionKeyEntry.getKey().getField("zip");
Assert.assertEquals(Long.toString(now) + Path.SEPARATOR + zip, relativePath);
Assert.assertEquals(pfsBaseLocation.append(relativePath), partitionDetail.getLocation());
}
for (Map.Entry<PartitionKey, Collection<GenericRecord>> keyToRecordsEntry : keyToRecordsMap.asMap().entrySet()) {
Set<GenericRecord> genericRecords = new HashSet<>(keyToRecordsEntry.getValue());
Assert.assertEquals(genericRecords, readOutput(partitions.get(keyToRecordsEntry.getKey()).getLocation()));
}
}
});
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class RemotePrivilegesTest method setup.
@BeforeClass
public static void setup() throws IOException, InterruptedException {
CConfiguration cConf = CConfiguration.create();
cConf.set(Constants.CFG_LOCAL_DATA_DIR, TEMPORARY_FOLDER.newFolder().getAbsolutePath());
cConf.setBoolean(Constants.Security.ENABLED, true);
cConf.setBoolean(Constants.Security.KERBEROS_ENABLED, false);
cConf.setBoolean(Constants.Security.Authorization.ENABLED, true);
cConf.setInt(Constants.Security.Authorization.CACHE_MAX_ENTRIES, 10000);
cConf.setInt(Constants.Security.Authorization.CACHE_TTL_SECS, CACHE_TIMEOUT);
Manifest manifest = new Manifest();
manifest.getMainAttributes().put(Attributes.Name.MAIN_CLASS, InMemoryAuthorizer.class.getName());
LocationFactory locationFactory = new LocalLocationFactory(TEMPORARY_FOLDER.newFolder());
Location externalAuthJar = AppJarHelper.createDeploymentJar(locationFactory, InMemoryAuthorizer.class, manifest);
cConf.set(Constants.Security.Authorization.EXTENSION_JAR_PATH, externalAuthJar.toString());
Injector injector = AppFabricTestHelper.getInjector(cConf);
discoveryService = injector.getInstance(DiscoveryServiceClient.class);
appFabricServer = injector.getInstance(AppFabricServer.class);
appFabricServer.startAndWait();
waitForService(Constants.Service.APP_FABRIC_HTTP);
authorizationEnforcer = injector.getInstance(RemoteAuthorizationEnforcer.class);
privilegesManager = injector.getInstance(PrivilegesManager.class);
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class AuthorizationBootstrapperTest method setup.
@BeforeClass
public static void setup() throws Exception {
CConfiguration cConf = CConfiguration.create();
cConf.set(Constants.CFG_LOCAL_DATA_DIR, TMP_FOLDER.newFolder().getAbsolutePath());
cConf.setBoolean(Constants.Security.ENABLED, true);
cConf.setBoolean(Constants.Security.KERBEROS_ENABLED, false);
cConf.setBoolean(Constants.Security.Authorization.ENABLED, true);
Location deploymentJar = AppJarHelper.createDeploymentJar(new LocalLocationFactory(TMP_FOLDER.newFolder()), InMemoryAuthorizer.class);
cConf.set(Constants.Security.Authorization.EXTENSION_JAR_PATH, deploymentJar.toURI().getPath());
// make Alice an admin user, so she can create namespaces
cConf.set(Constants.Security.Authorization.ADMIN_USERS, ADMIN_USER.getName());
instanceId = new InstanceId(cConf.get(Constants.INSTANCE_NAME));
// setup a system artifact
File systemArtifactsDir = TMP_FOLDER.newFolder();
cConf.set(Constants.AppFabric.SYSTEM_ARTIFACTS_DIR, systemArtifactsDir.getAbsolutePath());
createSystemArtifact(systemArtifactsDir);
Injector injector = Guice.createInjector(new AppFabricTestModule(cConf));
namespaceQueryAdmin = injector.getInstance(NamespaceQueryAdmin.class);
namespaceAdmin = injector.getInstance(NamespaceAdmin.class);
defaultNamespaceEnsurer = new DefaultNamespaceEnsurer(namespaceAdmin);
discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);
txManager = injector.getInstance(TransactionManager.class);
datasetService = injector.getInstance(DatasetService.class);
systemArtifactLoader = injector.getInstance(SystemArtifactLoader.class);
authorizationBootstrapper = injector.getInstance(AuthorizationBootstrapper.class);
artifactRepository = injector.getInstance(ArtifactRepository.class);
dsFramework = injector.getInstance(DatasetFramework.class);
authorizationEnforcer = injector.getInstance(AuthorizationEnforcer.class);
}
Aggregations