use of co.cask.cdap.dq.functions.DiscreteValuesHistogram in project cdap by caskdata.
the class AggregationFunctionsTest method discreteValuesGenerateAggregationTest.
@Test
public void discreteValuesGenerateAggregationTest() throws Exception {
DataQualityWritable val1 = new DataQualityWritable();
DataQualityWritable val2 = new DataQualityWritable();
DataQualityWritable val3 = new DataQualityWritable();
val1.set(new Text("a"));
val2.set(new Text("a"));
val3.set(new Text("b"));
DiscreteValuesHistogram discreteValuesHistogram = new DiscreteValuesHistogram();
discreteValuesHistogram.add(val1);
discreteValuesHistogram.add(val2);
discreteValuesHistogram.add(val3);
Map<String, Integer> expectedMap = Maps.newHashMap();
expectedMap.put("a", 2);
expectedMap.put("b", 1);
byte[] outputVal = discreteValuesHistogram.aggregate();
Map<String, Integer> outputMap = GSON.fromJson(Bytes.toString(outputVal), TOKEN_TYPE_MAP_STRING_INTEGER);
Assert.assertEquals(expectedMap, outputMap);
}
use of co.cask.cdap.dq.functions.DiscreteValuesHistogram in project cdap by caskdata.
the class AggregationFunctionsTest method discreteValuesHistogramReturnAggregationTest.
@Test
public void discreteValuesHistogramReturnAggregationTest() throws Exception {
DiscreteValuesHistogram discreteValuesHistogram = new DiscreteValuesHistogram();
Map<String, Integer> map1 = Maps.newHashMap();
map1.put("a", 1);
map1.put("b", 2);
Map<String, Integer> map2 = Maps.newHashMap();
map2.put("a", 2);
map2.put("b", 3);
Map<String, Integer> combinedMap = Maps.newHashMap();
combinedMap.put("a", 3);
combinedMap.put("b", 5);
byte[] bytesMap1 = Bytes.toBytes(GSON.toJson(map1));
byte[] bytesMap2 = Bytes.toBytes(GSON.toJson(map2));
discreteValuesHistogram.combine(bytesMap1);
discreteValuesHistogram.combine(bytesMap2);
Map<String, Integer> histogramVal = discreteValuesHistogram.retrieveAggregation();
Assert.assertEquals(combinedMap, histogramVal);
}
use of co.cask.cdap.dq.functions.DiscreteValuesHistogram in project cdap by caskdata.
the class DataQualityAppTest method testDefaultConfig.
@Test
public void testDefaultConfig() throws Exception {
Map<String, Set<String>> testMap = new HashMap<>();
Set<String> testSet = new HashSet<>();
testSet.add("DiscreteValuesHistogram");
testMap.put("content_length", testSet);
DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "dataQuality", testMap);
ApplicationId appId = NamespaceId.DEFAULT.app("newApp");
AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
ApplicationManager applicationManager = deployApplication(appId, appRequest);
MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
Table logDataStore = (Table) getDataset("dataQuality").get();
DiscreteValuesHistogram discreteValuesHistogramAggregationFunction = new DiscreteValuesHistogram();
Row row;
try (Scanner scanner = logDataStore.scan(null, null)) {
while ((row = scanner.next()) != null) {
if (Bytes.toString(row.getRow()).contains("content_length")) {
Map<byte[], byte[]> columnsMapBytes = row.getColumns();
byte[] output = columnsMapBytes.get(Bytes.toBytes("DiscreteValuesHistogram"));
if (output != null) {
discreteValuesHistogramAggregationFunction.combine(output);
}
}
}
}
Map<String, Integer> outputMap = discreteValuesHistogramAggregationFunction.retrieveAggregation();
Map<String, Integer> expectedMap = Maps.newHashMap();
expectedMap.put("256", 3);
Assert.assertEquals(expectedMap, outputMap);
}
Aggregations