Search in sources :

Example 16 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class ClicksAndViewsMapReduceTest method testClicksAndViews.

@Test
public void testClicksAndViews() throws Exception {
    ApplicationManager applicationManager = deployApplication(ClicksAndViews.class);
    // write each of the views to the VIEWS stream
    StreamManager viewsStream = getStreamManager(ClicksAndViews.VIEWS);
    for (String view : VIEWS) {
        viewsStream.send(view);
    }
    // send clicks for viewIds 1,2,5
    StreamManager clicksStream = getStreamManager(ClicksAndViews.CLICKS);
    for (Integer click : CLICKS) {
        clicksStream.send(createClick(click));
    }
    MapReduceManager mapReduceManager = applicationManager.getMapReduceManager(ClicksAndViewsMapReduce.NAME).start(ImmutableMap.of("output.partition.key.runtime", Integer.toString(OUTPUT_PARTITION_RUNTIME)));
    mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    List<String> joinedViews = new ArrayList<>();
    for (int i = 0; i < VIEWS.size(); i++) {
        joinedViews.add(createJoinedView(VIEWS.get(i), Collections.frequency(CLICKS, i)));
    }
    Set<String> expectedJoinedViews = ImmutableSet.copyOf(joinedViews);
    Assert.assertEquals(expectedJoinedViews, getDataFromFile());
    Assert.assertEquals(expectedJoinedViews, getDataFromExplore());
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) StreamManager(co.cask.cdap.test.StreamManager) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 17 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class DataQualityAppTest method testDefaultConfig.

@Test
public void testDefaultConfig() throws Exception {
    Map<String, Set<String>> testMap = new HashMap<>();
    Set<String> testSet = new HashSet<>();
    testSet.add("DiscreteValuesHistogram");
    testMap.put("content_length", testSet);
    DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "dataQuality", testMap);
    ApplicationId appId = NamespaceId.DEFAULT.app("newApp");
    AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
    ApplicationManager applicationManager = deployApplication(appId, appRequest);
    MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
    Table logDataStore = (Table) getDataset("dataQuality").get();
    DiscreteValuesHistogram discreteValuesHistogramAggregationFunction = new DiscreteValuesHistogram();
    Row row;
    try (Scanner scanner = logDataStore.scan(null, null)) {
        while ((row = scanner.next()) != null) {
            if (Bytes.toString(row.getRow()).contains("content_length")) {
                Map<byte[], byte[]> columnsMapBytes = row.getColumns();
                byte[] output = columnsMapBytes.get(Bytes.toBytes("DiscreteValuesHistogram"));
                if (output != null) {
                    discreteValuesHistogramAggregationFunction.combine(output);
                }
            }
        }
    }
    Map<String, Integer> outputMap = discreteValuesHistogramAggregationFunction.retrieveAggregation();
    Map<String, Integer> expectedMap = Maps.newHashMap();
    expectedMap.put("256", 3);
    Assert.assertEquals(expectedMap, outputMap);
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) ApplicationManager(co.cask.cdap.test.ApplicationManager) Set(java.util.Set) HashSet(java.util.HashSet) Table(co.cask.cdap.api.dataset.table.Table) MapReduceManager(co.cask.cdap.test.MapReduceManager) HashMap(java.util.HashMap) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ArtifactSummary(co.cask.cdap.api.artifact.ArtifactSummary) DiscreteValuesHistogram(co.cask.cdap.dq.functions.DiscreteValuesHistogram) Row(co.cask.cdap.api.dataset.table.Row) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) DataQualityApp(co.cask.cdap.dq.DataQualityApp) Test(org.junit.Test)

Example 18 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class StreamConversionTest method testStreamConversion.

@Test
public void testStreamConversion() throws Exception {
    // Deploy the PurchaseApp application
    ApplicationManager appManager = deployApplication(StreamConversionApp.class);
    // send some data to the events stream
    StreamManager streamManager = getStreamManager("events");
    streamManager.send("15");
    streamManager.send("16");
    streamManager.send("17");
    // record the current time. Add 1 in case the stream events are added with the same timestamp as the current time.
    final long startTime = System.currentTimeMillis() + 1;
    // run the mapreduce
    MapReduceManager mapReduceManager = appManager.getMapReduceManager("StreamConversionMapReduce").start(ImmutableMap.of("logical.start.time", Long.toString(startTime)));
    mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // verify the single partition in the file set
    DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset("converted");
    Assert.assertNotNull(fileSetManager.get().getPartitionByTime(startTime));
    Calendar calendar = Calendar.getInstance();
    calendar.setTimeInMillis(startTime);
    int year = calendar.get(Calendar.YEAR);
    int month = calendar.get(Calendar.MONTH) + 1;
    int day = calendar.get(Calendar.DAY_OF_MONTH);
    int hour = calendar.get(Calendar.HOUR_OF_DAY);
    int minute = calendar.get(Calendar.MINUTE);
    // query with SQL
    Connection connection = getQueryClient();
    ResultSet results = connection.prepareStatement("SELECT year, month, day, hour, minute " + "FROM dataset_converted " + "WHERE body = '17'").executeQuery();
    // should return only one row, with correct time fields
    Assert.assertTrue(results.next());
    Assert.assertEquals(year, results.getInt(1));
    Assert.assertEquals(month, results.getInt(2));
    Assert.assertEquals(day, results.getInt(3));
    Assert.assertEquals(hour, results.getInt(4));
    Assert.assertEquals(minute, results.getInt(5));
    Assert.assertFalse(results.next());
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) StreamManager(co.cask.cdap.test.StreamManager) Calendar(java.util.Calendar) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) Test(org.junit.Test)

Example 19 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class TestFrameworkTestRun method testMapperDatasetAccess.

@Category(SlowTests.class)
@Test
public void testMapperDatasetAccess() throws Exception {
    addDatasetInstance("keyValueTable", "table1");
    addDatasetInstance("keyValueTable", "table2");
    DataSetManager<KeyValueTable> tableManager = getDataset("table1");
    KeyValueTable inputTable = tableManager.get();
    inputTable.write("hello", "world");
    tableManager.flush();
    ApplicationManager appManager = deployApplication(DatasetWithMRApp.class);
    Map<String, String> argsForMR = ImmutableMap.of(DatasetWithMRApp.INPUT_KEY, "table1", DatasetWithMRApp.OUTPUT_KEY, "table2");
    MapReduceManager mrManager = appManager.getMapReduceManager(DatasetWithMRApp.MAPREDUCE_PROGRAM).start(argsForMR);
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    appManager.stopAll();
    DataSetManager<KeyValueTable> outTableManager = getDataset("table2");
    verifyMapperJobOutput(DatasetWithMRApp.class, outTableManager);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 20 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class DynamicPartitioningTestRun method testDynamicPartitioningMRWithFailure.

private void testDynamicPartitioningMRWithFailure(ApplicationManager appManager, String dsWithExistingPartition, String... outputs) throws Exception {
    // set up the output datasets
    String outputArg = "";
    for (String dataset : outputs) {
        outputArg += dataset + " ";
        try {
            deleteDatasetInstance(testSpace.dataset(dataset));
        } catch (InstanceNotFoundException e) {
        // may be expected. I wish the test framework had truncate()
        }
        addDatasetInstance(PartitionedFileSet.class.getName(), testSpace.dataset(dataset), PartitionedFileSetProperties.builder().setPartitioning(PARTITIONING).setEnableExploreOnCreate(true).setOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class).setOutputProperty(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.SEPERATOR, ",").setExploreFormat("csv").setExploreSchema("key string, value string").build());
    }
    outputArg = outputArg.trim();
    // create partition (x="1") in one of the outputs
    DataSetManager<PartitionedFileSet> pfs = getDataset(testSpace.dataset(dsWithExistingPartition));
    Location loc = pfs.get().getEmbeddedFileSet().getLocation("some/path");
    OutputStream os = loc.append("part1").getOutputStream();
    try (Writer writer = new OutputStreamWriter(os)) {
        writer.write("1,x\n");
    }
    pfs.get().addPartition(PartitionKey.builder().addStringField("x", "1").build(), "some/path");
    pfs.flush();
    validatePartitions(dsWithExistingPartition, true);
    Map<String, String> arguments = ImmutableMap.of("outputs", outputArg);
    final MapReduceManager mrManager = appManager.getMapReduceManager("DynamicPartitioningMR");
    final Set<RunRecord> oldRunRecords = new HashSet<>(mrManager.getHistory());
    mrManager.start(arguments);
    // Wait for the new run record to appear and finished running.
    final AtomicReference<RunRecord> lastRunRecord = new AtomicReference<>();
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            Set<RunRecord> runRecords = Sets.difference(new HashSet<>(mrManager.getHistory()), oldRunRecords);
            if (runRecords.isEmpty()) {
                return false;
            }
            // Get the last run record
            RunRecord runRecord = Iterables.getFirst(runRecords, null);
            if (runRecord != null && runRecord.getStatus() != ProgramRunStatus.RUNNING) {
                lastRunRecord.set(runRecord);
            }
            return lastRunRecord.get() != null;
        }
    }, 5, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    for (String dataset : outputs) {
        validatePartitions(dataset, dataset.equals(dsWithExistingPartition));
        validateFiles(dataset, dataset.equals(dsWithExistingPartition) ? loc : null);
    }
}
Also used : HashSet(java.util.HashSet) ResultSet(java.sql.ResultSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Set(java.util.Set) MapReduceManager(co.cask.cdap.test.MapReduceManager) InstanceNotFoundException(co.cask.cdap.api.dataset.InstanceNotFoundException) OutputStream(java.io.OutputStream) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) AtomicReference(java.util.concurrent.atomic.AtomicReference) InstanceNotFoundException(co.cask.cdap.api.dataset.InstanceNotFoundException) IOException(java.io.IOException) RunRecord(co.cask.cdap.proto.RunRecord) OutputStreamWriter(java.io.OutputStreamWriter) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) Location(org.apache.twill.filesystem.Location) HashSet(java.util.HashSet)

Aggregations

MapReduceManager (co.cask.cdap.test.MapReduceManager)24 ApplicationManager (co.cask.cdap.test.ApplicationManager)22 Test (org.junit.Test)21 ServiceManager (co.cask.cdap.test.ServiceManager)13 StreamManager (co.cask.cdap.test.StreamManager)9 URL (java.net.URL)9 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)8 SparkManager (co.cask.cdap.test.SparkManager)5 HttpResponse (co.cask.common.http.HttpResponse)5 Location (org.apache.twill.filesystem.Location)5 ArtifactSummary (co.cask.cdap.api.artifact.ArtifactSummary)4 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)4 FlowManager (co.cask.cdap.test.FlowManager)4 HashMap (java.util.HashMap)4 HashSet (java.util.HashSet)4 Set (java.util.Set)4 DataQualityApp (co.cask.cdap.dq.DataQualityApp)3 AppRequest (co.cask.cdap.proto.artifact.AppRequest)3 ApplicationId (co.cask.cdap.proto.id.ApplicationId)3 HttpRequest (co.cask.common.http.HttpRequest)3