use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class SparkPageRankAppTest method test.
@Test
public void test() throws Exception {
// Deploy the SparkPageRankApp
ApplicationManager appManager = deployApplication(SparkPageRankApp.class);
// Send a stream events to the Stream
StreamManager streamManager = getStreamManager(SparkPageRankApp.BACKLINK_URL_STREAM);
streamManager.send(Joiner.on(" ").join(URL_1, URL_2));
streamManager.send(Joiner.on(" ").join(URL_1, URL_3));
streamManager.send(Joiner.on(" ").join(URL_2, URL_1));
streamManager.send(Joiner.on(" ").join(URL_3, URL_1));
// Start service
ServiceManager serviceManager = appManager.getServiceManager(SparkPageRankApp.SERVICE_HANDLERS).start();
// Wait for service to start since the Spark program needs it
serviceManager.waitForStatus(true);
// Start the SparkPageRankProgram
SparkManager sparkManager = appManager.getSparkManager(SparkPageRankApp.PageRankSpark.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
// Run RanksCounter which will count the number of pages for a pr
MapReduceManager mapReduceManager = appManager.getMapReduceManager(SparkPageRankApp.RanksCounter.class.getSimpleName()).start();
mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
//Query for rank
URL url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), SparkPageRankApp.SparkPageRankServiceHandler.RANKS_PATH);
HttpRequest request = HttpRequest.post(url).withBody(("{\"" + SparkPageRankApp.SparkPageRankServiceHandler.URL_KEY + "\":\"" + URL_1 + "\"}")).build();
HttpResponse response = HttpRequests.execute(request);
Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
Assert.assertEquals(RANK, response.getResponseBodyAsString());
// Request total pages for a page rank and verify it
url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), SparkPageRankApp.SparkPageRankServiceHandler.TOTAL_PAGES_PATH + "/" + RANK);
response = HttpRequests.execute(HttpRequest.get(url).build());
Assert.assertEquals(TOTAL_PAGES, response.getResponseBodyAsString());
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class SportResultsTest method testPartitionedCounting.
@Test
public void testPartitionedCounting() throws Exception {
// deploy the application and start the upload service
ApplicationManager appManager = deployApplication(SportResults.class);
ServiceManager serviceManager = appManager.getServiceManager("UploadService").start();
serviceManager.waitForStatus(true);
// upload a few dummy results
URL url = serviceManager.getServiceURL();
uploadResults(url, "fantasy", 2014, FANTASY_2014);
uploadResults(url, "fantasy", 2015, FANTASY_2015);
uploadResults(url, "critters", 2014, CRITTERS_2014);
// start a map/reduce that counts all seasons for the fantasy league
MapReduceManager mrManager = appManager.getMapReduceManager("ScoreCounter").start(ImmutableMap.of("league", "fantasy"));
// should be much faster, though
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// validate the output by reading directly from the file set
DataSetManager<PartitionedFileSet> dataSetManager = getDataset("totals");
PartitionedFileSet totals = dataSetManager.get();
PartitionDetail partitionDetail = totals.getPartition(PartitionKey.builder().addStringField("league", "fantasy").build());
Assert.assertNotNull(partitionDetail);
Location location = partitionDetail.getLocation();
// find the part file that has the actual results
Assert.assertTrue(location.isDirectory());
for (Location file : location.list()) {
if (file.getName().startsWith("part")) {
location = file;
}
}
BufferedReader reader = new BufferedReader(new InputStreamReader(location.getInputStream(), "UTF-8"));
// validate each line
Map<String, String[]> expected = ImmutableMap.of("My Team", new String[] { "My Team", "2", "0", "1", "53", "65" }, "Your Team", new String[] { "Your Team", "1", "0", "2", "63", "60" }, "Other Team", new String[] { "Other Team", "1", "0", "1", "40", "31" });
while (true) {
String line = reader.readLine();
if (line == null) {
break;
}
String[] fields = line.split(",");
Assert.assertArrayEquals(expected.get(fields[0]), fields);
}
// verify using SQL
// query with SQL
Connection connection = getQueryClient();
ResultSet results = connection.prepareStatement("SELECT wins, ties, losses, scored, conceded " + "FROM totals WHERE team = 'My Team' AND league = 'fantasy'").executeQuery();
// should return only one row, with correct time fields
Assert.assertTrue(results.next());
Assert.assertEquals(2, results.getInt(1));
Assert.assertEquals(0, results.getInt(2));
Assert.assertEquals(1, results.getInt(3));
Assert.assertEquals(53, results.getInt(4));
Assert.assertEquals(65, results.getInt(5));
Assert.assertFalse(results.next());
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class PartitionRollbackTestRun method testPFSRollback.
/*
* This tests all the following cases:
*
* 1. addPartition(location) fails because partition already exists
* 2. addPartition(location) fails because Hive partition already exists
* 3. addPartition(location) succeeds but transaction fails
* 4. partitionOutput.addPartition() fails because partition already exists
* 5. partitionOutput.addPartition() fails because Hive partition already exists
* 6. partitionOutput.addPartition() succeeds but transaction fails
* 7. mapreduce writing partition fails because location already exists
* 8. mapreduce writing partition fails because partition already exists
* 9. mapreduce writing partition fails because Hive partition already exists
* 10. mapreduce writing dynamic partition fails because location already exists
* 11. mapreduce writing dynamic partition fails because partition already exists
* 12. mapreduce writing dynamic partition fails because Hive partition already exists
* 13. multi-output mapreduce writing partition fails because location already exists
* 13a. first output fails, other output must rollback 0 and 5
* 13b. second output fails, first output must rollback 0 and 5
* 14. multi-output mapreduce writing partition fails because partition already exists
* 14a. first output fails, other output must rollback partition 5
* 14b. second output fails, first output must rollback partition 5
* 15. multi-output mapreduce writing partition fails because Hive partition already exists
* 15a. first output fails, other output must rollback partitions 0 and 5
* 15b. second output fails, first output must rollback partitions 0 and 5
*
* For all these cases, we validate that existing files and partitions are preserved, and newly
* added files and partitions are rolled back.
*/
@Test
public void testPFSRollback() throws Exception {
ApplicationManager appManager = deployApplication(AppWritingToPartitioned.class);
MapReduceManager mrManager = appManager.getMapReduceManager(MAPREDUCE);
int numRuns = 0;
Validator pfsValidator = new Validator(PFS);
Validator otherValidator = new Validator(OTHER);
final UnitTestManager.UnitTestDatasetManager<PartitionedFileSet> pfsManager = pfsValidator.getPfsManager();
final PartitionedFileSet pfs = pfsManager.get();
final PartitionedFileSet other = otherValidator.getPfsManager().get();
final String path3 = pfsValidator.getRelativePath3();
// 1. addPartition(location) fails because partition already exists
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_1, path3);
}
});
Assert.fail("Expected tx to fail because partition for number=1 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 2. addPartition(location) fails because Hive partition already exists
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_4, path3);
}
});
Assert.fail("Expected tx to fail because hive partition for number=1 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 3. addPartition(location) succeeds but transaction fails
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_3, path3);
throw new RuntimeException("fail the tx");
}
});
Assert.fail("Expected tx to fail because it threw a runtime exception");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 4. partitionOutput.addPartition() fails because partition already exists
final PartitionOutput output2x = pfs.getPartitionOutput(KEY_2);
final Location location2x = output2x.getLocation();
try (Writer writer = new OutputStreamWriter(location2x.append("file").getOutputStream())) {
writer.write("2x,2x\n");
}
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
output2x.addPartition();
}
});
Assert.fail("Expected tx to fail because partition for number=2 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
Assert.assertFalse(location2x.exists());
// 5. partitionOutput.addPartition() fails because Hive partition already exists
final PartitionOutput output4x = pfs.getPartitionOutput(KEY_4);
final Location location4x = output4x.getLocation();
try (Writer writer = new OutputStreamWriter(location4x.append("file").getOutputStream())) {
writer.write("4x,4x\n");
}
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
output4x.addPartition();
}
});
Assert.fail("Expected tx to fail because hive partition for number=4 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
Assert.assertFalse(location4x.exists());
// 6. partitionOutput.addPartition() succeeds but transaction fails
final PartitionOutput output5x = pfs.getPartitionOutput(KEY_5);
final Location location5x = output5x.getLocation();
try (Writer writer = new OutputStreamWriter(location5x.append("file").getOutputStream())) {
writer.write("5x,5x\n");
}
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
output5x.addPartition();
throw new RuntimeException("fail the tx");
}
});
Assert.fail("Expected tx to fail because it threw a runtime exception");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
Assert.assertFalse(location5x.exists());
// 7. mapreduce writing partition fails because location already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "1", "input.text", "1x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
// 8. mapreduce writing partition fails because partition already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "2", "input.text", "2x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
// 9. mapreduce writing partition fails because Hive partition already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "4", "input.text", "4x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
// 10. mapreduce writing dynamic partition fails because location already exists
mrManager.start(ImmutableMap.of("input.text", "3x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 11. mapreduce writing dynamic partition fails because partition already exists
mrManager.start(ImmutableMap.of("input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 12. mapreduce writing dynamic partition fails because Hive partition already exists
mrManager.start(ImmutableMap.of("input.text", "0x 4x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 13. multi-output mapreduce writing partition fails because location already exists
// 13a. first output fails, other output must rollback 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "1", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 13b. second output fails, first output must rollback 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "1", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 14. multi-output mapreduce writing partition fails because partition already exists
// 14a. first output fails, other output must rollback partition 5
// TODO: bring this back when CDAP-8766 is fixed
/*
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "2", OTHER_OUT, "5", "input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
*/
// 14b. second output fails, first output must rollback partition 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "5", OTHER_OUT, "2", "input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 15. multi-output mapreduce writing partition fails because Hive partition already exists
// 15a. first output fails, other output must rollback partitions 0 and 5
// TODO: bring this back when CDAP-8766 is fixed
/*
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "4", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 15b. second output fails, first output must rollback partitions 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "4", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
*/
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class MapReduceServiceIntegrationTestRun method test.
@Test
public void test() throws Exception {
ApplicationManager applicationManager = deployApplication(TestMapReduceServiceIntegrationApp.class);
ServiceManager serviceManager = applicationManager.getServiceManager(TestMapReduceServiceIntegrationApp.SERVICE_NAME).start();
serviceManager.waitForStatus(true);
DataSetManager<MyKeyValueTableDefinition.KeyValueTable> inDataSet = getDataset(TestMapReduceServiceIntegrationApp.INPUT_DATASET);
inDataSet.get().write("key1", "Two words");
inDataSet.get().write("key2", "Plus three words");
inDataSet.flush();
MapReduceManager mrManager = applicationManager.getMapReduceManager(TestMapReduceServiceIntegrationApp.MR_NAME).start();
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
DataSetManager<MyKeyValueTableDefinition.KeyValueTable> outDataSet = getDataset(TestMapReduceServiceIntegrationApp.OUTPUT_DATASET);
MyKeyValueTableDefinition.KeyValueTable results = outDataSet.get();
String total = results.get(TestMapReduceServiceIntegrationApp.SQUARED_TOTAL_WORDS_COUNT);
Assert.assertEquals(25, Integer.parseInt(total));
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class BatchStreamIntegrationTestRun method verifyStreamBatchJob.
private void verifyStreamBatchJob(StreamManager streamManager, ApplicationManager applicationManager, String mapReduceName, int timeout) throws Exception {
for (int i = 0; i < 50; i++) {
streamManager.send(String.valueOf(i));
}
MapReduceManager mapReduceManager = applicationManager.getMapReduceManager(mapReduceName).start();
mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, timeout, TimeUnit.SECONDS);
// The MR job simply turns every stream event body into key/value pairs, with key==value.
DataSetManager<KeyValueTable> datasetManager = getDataset("results");
KeyValueTable results = datasetManager.get();
for (int i = 0; i < 50; i++) {
byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
Assert.assertArrayEquals(key, results.read(key));
}
}
Aggregations