use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class ClicksAndViewsMapReduceTest method testClicksAndViews.
@Test
public void testClicksAndViews() throws Exception {
ApplicationManager applicationManager = deployApplication(ClicksAndViews.class);
// write each of the views to the VIEWS stream
StreamManager viewsStream = getStreamManager(ClicksAndViews.VIEWS);
for (String view : VIEWS) {
viewsStream.send(view);
}
// send clicks for viewIds 1,2,5
StreamManager clicksStream = getStreamManager(ClicksAndViews.CLICKS);
for (Integer click : CLICKS) {
clicksStream.send(createClick(click));
}
MapReduceManager mapReduceManager = applicationManager.getMapReduceManager(ClicksAndViewsMapReduce.NAME).start(ImmutableMap.of("output.partition.key.runtime", Integer.toString(OUTPUT_PARTITION_RUNTIME)));
mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
List<String> joinedViews = new ArrayList<>();
for (int i = 0; i < VIEWS.size(); i++) {
joinedViews.add(createJoinedView(VIEWS.get(i), Collections.frequency(CLICKS, i)));
}
Set<String> expectedJoinedViews = ImmutableSet.copyOf(joinedViews);
Assert.assertEquals(expectedJoinedViews, getDataFromFile());
Assert.assertEquals(expectedJoinedViews, getDataFromExplore());
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class DataQualityAppTest method testDefaultConfig.
@Test
public void testDefaultConfig() throws Exception {
Map<String, Set<String>> testMap = new HashMap<>();
Set<String> testSet = new HashSet<>();
testSet.add("DiscreteValuesHistogram");
testMap.put("content_length", testSet);
DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "dataQuality", testMap);
ApplicationId appId = NamespaceId.DEFAULT.app("newApp");
AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
ApplicationManager applicationManager = deployApplication(appId, appRequest);
MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
Table logDataStore = (Table) getDataset("dataQuality").get();
DiscreteValuesHistogram discreteValuesHistogramAggregationFunction = new DiscreteValuesHistogram();
Row row;
try (Scanner scanner = logDataStore.scan(null, null)) {
while ((row = scanner.next()) != null) {
if (Bytes.toString(row.getRow()).contains("content_length")) {
Map<byte[], byte[]> columnsMapBytes = row.getColumns();
byte[] output = columnsMapBytes.get(Bytes.toBytes("DiscreteValuesHistogram"));
if (output != null) {
discreteValuesHistogramAggregationFunction.combine(output);
}
}
}
}
Map<String, Integer> outputMap = discreteValuesHistogramAggregationFunction.retrieveAggregation();
Map<String, Integer> expectedMap = Maps.newHashMap();
expectedMap.put("256", 3);
Assert.assertEquals(expectedMap, outputMap);
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class StreamConversionTest method testStreamConversion.
@Test
public void testStreamConversion() throws Exception {
// Deploy the PurchaseApp application
ApplicationManager appManager = deployApplication(StreamConversionApp.class);
// send some data to the events stream
StreamManager streamManager = getStreamManager("events");
streamManager.send("15");
streamManager.send("16");
streamManager.send("17");
// record the current time. Add 1 in case the stream events are added with the same timestamp as the current time.
final long startTime = System.currentTimeMillis() + 1;
// run the mapreduce
MapReduceManager mapReduceManager = appManager.getMapReduceManager("StreamConversionMapReduce").start(ImmutableMap.of("logical.start.time", Long.toString(startTime)));
mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// verify the single partition in the file set
DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset("converted");
Assert.assertNotNull(fileSetManager.get().getPartitionByTime(startTime));
Calendar calendar = Calendar.getInstance();
calendar.setTimeInMillis(startTime);
int year = calendar.get(Calendar.YEAR);
int month = calendar.get(Calendar.MONTH) + 1;
int day = calendar.get(Calendar.DAY_OF_MONTH);
int hour = calendar.get(Calendar.HOUR_OF_DAY);
int minute = calendar.get(Calendar.MINUTE);
// query with SQL
Connection connection = getQueryClient();
ResultSet results = connection.prepareStatement("SELECT year, month, day, hour, minute " + "FROM dataset_converted " + "WHERE body = '17'").executeQuery();
// should return only one row, with correct time fields
Assert.assertTrue(results.next());
Assert.assertEquals(year, results.getInt(1));
Assert.assertEquals(month, results.getInt(2));
Assert.assertEquals(day, results.getInt(3));
Assert.assertEquals(hour, results.getInt(4));
Assert.assertEquals(minute, results.getInt(5));
Assert.assertFalse(results.next());
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class TestFrameworkTestRun method testMapperDatasetAccess.
@Category(SlowTests.class)
@Test
public void testMapperDatasetAccess() throws Exception {
addDatasetInstance("keyValueTable", "table1");
addDatasetInstance("keyValueTable", "table2");
DataSetManager<KeyValueTable> tableManager = getDataset("table1");
KeyValueTable inputTable = tableManager.get();
inputTable.write("hello", "world");
tableManager.flush();
ApplicationManager appManager = deployApplication(DatasetWithMRApp.class);
Map<String, String> argsForMR = ImmutableMap.of(DatasetWithMRApp.INPUT_KEY, "table1", DatasetWithMRApp.OUTPUT_KEY, "table2");
MapReduceManager mrManager = appManager.getMapReduceManager(DatasetWithMRApp.MAPREDUCE_PROGRAM).start(argsForMR);
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
appManager.stopAll();
DataSetManager<KeyValueTable> outTableManager = getDataset("table2");
verifyMapperJobOutput(DatasetWithMRApp.class, outTableManager);
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class DynamicPartitioningTestRun method testDynamicPartitioningMRWithFailure.
private void testDynamicPartitioningMRWithFailure(ApplicationManager appManager, String dsWithExistingPartition, String... outputs) throws Exception {
// set up the output datasets
String outputArg = "";
for (String dataset : outputs) {
outputArg += dataset + " ";
try {
deleteDatasetInstance(testSpace.dataset(dataset));
} catch (InstanceNotFoundException e) {
// may be expected. I wish the test framework had truncate()
}
addDatasetInstance(PartitionedFileSet.class.getName(), testSpace.dataset(dataset), PartitionedFileSetProperties.builder().setPartitioning(PARTITIONING).setEnableExploreOnCreate(true).setOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class).setOutputProperty(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.SEPERATOR, ",").setExploreFormat("csv").setExploreSchema("key string, value string").build());
}
outputArg = outputArg.trim();
// create partition (x="1") in one of the outputs
DataSetManager<PartitionedFileSet> pfs = getDataset(testSpace.dataset(dsWithExistingPartition));
Location loc = pfs.get().getEmbeddedFileSet().getLocation("some/path");
OutputStream os = loc.append("part1").getOutputStream();
try (Writer writer = new OutputStreamWriter(os)) {
writer.write("1,x\n");
}
pfs.get().addPartition(PartitionKey.builder().addStringField("x", "1").build(), "some/path");
pfs.flush();
validatePartitions(dsWithExistingPartition, true);
Map<String, String> arguments = ImmutableMap.of("outputs", outputArg);
final MapReduceManager mrManager = appManager.getMapReduceManager("DynamicPartitioningMR");
final Set<RunRecord> oldRunRecords = new HashSet<>(mrManager.getHistory());
mrManager.start(arguments);
// Wait for the new run record to appear and finished running.
final AtomicReference<RunRecord> lastRunRecord = new AtomicReference<>();
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
Set<RunRecord> runRecords = Sets.difference(new HashSet<>(mrManager.getHistory()), oldRunRecords);
if (runRecords.isEmpty()) {
return false;
}
// Get the last run record
RunRecord runRecord = Iterables.getFirst(runRecords, null);
if (runRecord != null && runRecord.getStatus() != ProgramRunStatus.RUNNING) {
lastRunRecord.set(runRecord);
}
return lastRunRecord.get() != null;
}
}, 5, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
for (String dataset : outputs) {
validatePartitions(dataset, dataset.equals(dsWithExistingPartition));
validateFiles(dataset, dataset.equals(dsWithExistingPartition) ? loc : null);
}
}
Aggregations