Search in sources :

Example 1 with DiscreteValuesHistogram

use of co.cask.cdap.dq.functions.DiscreteValuesHistogram in project cdap by caskdata.

the class AggregationFunctionsTest method discreteValuesGenerateAggregationTest.

@Test
public void discreteValuesGenerateAggregationTest() throws Exception {
    DataQualityWritable val1 = new DataQualityWritable();
    DataQualityWritable val2 = new DataQualityWritable();
    DataQualityWritable val3 = new DataQualityWritable();
    val1.set(new Text("a"));
    val2.set(new Text("a"));
    val3.set(new Text("b"));
    DiscreteValuesHistogram discreteValuesHistogram = new DiscreteValuesHistogram();
    discreteValuesHistogram.add(val1);
    discreteValuesHistogram.add(val2);
    discreteValuesHistogram.add(val3);
    Map<String, Integer> expectedMap = Maps.newHashMap();
    expectedMap.put("a", 2);
    expectedMap.put("b", 1);
    byte[] outputVal = discreteValuesHistogram.aggregate();
    Map<String, Integer> outputMap = GSON.fromJson(Bytes.toString(outputVal), TOKEN_TYPE_MAP_STRING_INTEGER);
    Assert.assertEquals(expectedMap, outputMap);
}
Also used : DiscreteValuesHistogram(co.cask.cdap.dq.functions.DiscreteValuesHistogram) Text(org.apache.hadoop.io.Text) DataQualityWritable(co.cask.cdap.dq.DataQualityWritable) Test(org.junit.Test)

Example 2 with DiscreteValuesHistogram

use of co.cask.cdap.dq.functions.DiscreteValuesHistogram in project cdap by caskdata.

the class AggregationFunctionsTest method discreteValuesHistogramReturnAggregationTest.

@Test
public void discreteValuesHistogramReturnAggregationTest() throws Exception {
    DiscreteValuesHistogram discreteValuesHistogram = new DiscreteValuesHistogram();
    Map<String, Integer> map1 = Maps.newHashMap();
    map1.put("a", 1);
    map1.put("b", 2);
    Map<String, Integer> map2 = Maps.newHashMap();
    map2.put("a", 2);
    map2.put("b", 3);
    Map<String, Integer> combinedMap = Maps.newHashMap();
    combinedMap.put("a", 3);
    combinedMap.put("b", 5);
    byte[] bytesMap1 = Bytes.toBytes(GSON.toJson(map1));
    byte[] bytesMap2 = Bytes.toBytes(GSON.toJson(map2));
    discreteValuesHistogram.combine(bytesMap1);
    discreteValuesHistogram.combine(bytesMap2);
    Map<String, Integer> histogramVal = discreteValuesHistogram.retrieveAggregation();
    Assert.assertEquals(combinedMap, histogramVal);
}
Also used : DiscreteValuesHistogram(co.cask.cdap.dq.functions.DiscreteValuesHistogram) Test(org.junit.Test)

Example 3 with DiscreteValuesHistogram

use of co.cask.cdap.dq.functions.DiscreteValuesHistogram in project cdap by caskdata.

the class DataQualityAppTest method testDefaultConfig.

@Test
public void testDefaultConfig() throws Exception {
    Map<String, Set<String>> testMap = new HashMap<>();
    Set<String> testSet = new HashSet<>();
    testSet.add("DiscreteValuesHistogram");
    testMap.put("content_length", testSet);
    DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "dataQuality", testMap);
    ApplicationId appId = NamespaceId.DEFAULT.app("newApp");
    AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
    ApplicationManager applicationManager = deployApplication(appId, appRequest);
    MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
    Table logDataStore = (Table) getDataset("dataQuality").get();
    DiscreteValuesHistogram discreteValuesHistogramAggregationFunction = new DiscreteValuesHistogram();
    Row row;
    try (Scanner scanner = logDataStore.scan(null, null)) {
        while ((row = scanner.next()) != null) {
            if (Bytes.toString(row.getRow()).contains("content_length")) {
                Map<byte[], byte[]> columnsMapBytes = row.getColumns();
                byte[] output = columnsMapBytes.get(Bytes.toBytes("DiscreteValuesHistogram"));
                if (output != null) {
                    discreteValuesHistogramAggregationFunction.combine(output);
                }
            }
        }
    }
    Map<String, Integer> outputMap = discreteValuesHistogramAggregationFunction.retrieveAggregation();
    Map<String, Integer> expectedMap = Maps.newHashMap();
    expectedMap.put("256", 3);
    Assert.assertEquals(expectedMap, outputMap);
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) ApplicationManager(co.cask.cdap.test.ApplicationManager) Set(java.util.Set) HashSet(java.util.HashSet) Table(co.cask.cdap.api.dataset.table.Table) MapReduceManager(co.cask.cdap.test.MapReduceManager) HashMap(java.util.HashMap) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ArtifactSummary(co.cask.cdap.api.artifact.ArtifactSummary) DiscreteValuesHistogram(co.cask.cdap.dq.functions.DiscreteValuesHistogram) Row(co.cask.cdap.api.dataset.table.Row) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) DataQualityApp(co.cask.cdap.dq.DataQualityApp) Test(org.junit.Test)

Aggregations

DiscreteValuesHistogram (co.cask.cdap.dq.functions.DiscreteValuesHistogram)3 Test (org.junit.Test)3 ArtifactSummary (co.cask.cdap.api.artifact.ArtifactSummary)1 Row (co.cask.cdap.api.dataset.table.Row)1 Scanner (co.cask.cdap.api.dataset.table.Scanner)1 Table (co.cask.cdap.api.dataset.table.Table)1 DataQualityApp (co.cask.cdap.dq.DataQualityApp)1 DataQualityWritable (co.cask.cdap.dq.DataQualityWritable)1 AppRequest (co.cask.cdap.proto.artifact.AppRequest)1 ApplicationId (co.cask.cdap.proto.id.ApplicationId)1 ApplicationManager (co.cask.cdap.test.ApplicationManager)1 MapReduceManager (co.cask.cdap.test.MapReduceManager)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 Text (org.apache.hadoop.io.Text)1