use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.
the class EntryLengthSummarizersTest method testAll.
/* Miscellaneous Test */
@Test
public void testAll() {
SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
Key k1 = new Key("maximumnoqualifier", "f1", "q", "vis1");
Key k2 = new Key("minKey", "fam2", "q2", "visibility2");
Key k3 = new Key("row3", "f3", "qualifier3", "v3");
Key k4 = new Key("r4", "family4", "qual4", "vis4");
Key k5 = new Key("fifthrow", "thirdfamily", "q5", "v5");
Key k6 = new Key("r6", "sixthfamily", "qual6", "visibi6");
Collector collector = entrySum.collector(sc);
collector.accept(k1, new Value("v1"));
collector.accept(k2, new Value("value2"));
collector.accept(k3, new Value("val3"));
collector.accept(k4, new Value("fourthvalue"));
collector.accept(k5, new Value(""));
collector.accept(k6, new Value("value6"));
HashMap<String, Long> stats = new HashMap<>();
collector.summarize(stats::put);
HashMap<String, Long> expected = new HashMap<>();
expected.put("key.min", 18L);
expected.put("key.max", 25L);
expected.put("key.sum", 132L);
// Log2 Histogram
expected.put("key.logHist.4", 2L);
expected.put("key.logHist.5", 4L);
expected.put("row.min", 2L);
expected.put("row.max", 18L);
expected.put("row.sum", 40L);
// Log2 Histogram
expected.put("row.logHist.1", 2L);
expected.put("row.logHist.2", 1L);
expected.put("row.logHist.3", 2L);
expected.put("row.logHist.4", 1L);
expected.put("family.min", 2L);
expected.put("family.max", 11L);
expected.put("family.sum", 37L);
// Log2 Histogram
expected.put("family.logHist.1", 2L);
expected.put("family.logHist.2", 1L);
expected.put("family.logHist.3", 3L);
expected.put("qualifier.min", 1L);
expected.put("qualifier.max", 10L);
expected.put("qualifier.sum", 25L);
// Log2 Histogram
expected.put("qualifier.logHist.0", 1L);
expected.put("qualifier.logHist.1", 2L);
expected.put("qualifier.logHist.2", 2L);
expected.put("qualifier.logHist.3", 1L);
expected.put("visibility.min", 2L);
expected.put("visibility.max", 11L);
expected.put("visibility.sum", 30L);
// Log2 Histogram
expected.put("visibility.logHist.1", 2L);
expected.put("visibility.logHist.2", 2L);
expected.put("visibility.logHist.3", 2L);
expected.put("value.min", 0L);
expected.put("value.max", 11L);
expected.put("value.sum", 29L);
// Log2 Histogram
expected.put("value.logHist.0", 1L);
expected.put("value.logHist.1", 1L);
expected.put("value.logHist.2", 1L);
expected.put("value.logHist.3", 3L);
expected.put("total", 6L);
Assert.assertEquals(expected, stats);
}
use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.
the class EntryLengthSummarizersTest method testBasicRow.
@Test
public void testBasicRow() {
SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
Key k1 = new Key("r1");
Key k2 = new Key("r2");
Key k3 = new Key("r3");
Collector collector = entrySum.collector(sc);
collector.accept(k1, new Value(""));
collector.accept(k2, new Value(""));
collector.accept(k3, new Value(""));
HashMap<String, Long> stats = new HashMap<>();
collector.summarize(stats::put);
HashMap<String, Long> expected = new HashMap<>();
expected.put("key.min", 2L);
expected.put("key.max", 2L);
expected.put("key.sum", 6L);
// Log2 Histogram
expected.put("key.logHist.1", 3L);
expected.put("row.min", 2L);
expected.put("row.max", 2L);
expected.put("row.sum", 6L);
// Log2 Histogram
expected.put("row.logHist.1", 3L);
expected.put("family.min", 0L);
expected.put("family.max", 0L);
expected.put("family.sum", 0L);
// Log2 Histogram
expected.put("family.logHist.0", 3L);
expected.put("qualifier.min", 0L);
expected.put("qualifier.max", 0L);
expected.put("qualifier.sum", 0L);
// Log2 Histogram
expected.put("qualifier.logHist.0", 3L);
expected.put("visibility.min", 0L);
expected.put("visibility.max", 0L);
expected.put("visibility.sum", 0L);
// Log2 Histogram
expected.put("visibility.logHist.0", 3L);
expected.put("value.min", 0L);
expected.put("value.max", 0L);
expected.put("value.sum", 0L);
// Log2 Histogram
expected.put("value.logHist.0", 3L);
expected.put("total", 3L);
Assert.assertEquals(expected, stats);
}
use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.
the class EntryLengthSummarizersTest method testComplexValue.
@Test
public void testComplexValue() {
SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
Key k1 = new Key("r1", "family1", "columnQualifier1", "v1");
Key k2 = new Key("row2", "columnFamily2", "q2", "visibility2");
Key k3 = new Key("columnRow3", "f3", "qualifier3", "columnVisibility3");
Collector collector = entrySum.collector(sc);
collector.accept(k1, new Value("v1"));
collector.accept(k2, new Value("value2"));
collector.accept(k3, new Value("keyValue3"));
HashMap<String, Long> stats = new HashMap<>();
collector.summarize(stats::put);
HashMap<String, Long> expected = new HashMap<>();
expected.put("key.min", 27L);
expected.put("key.max", 39L);
expected.put("key.sum", 96L);
// Log2 Histogram
expected.put("key.logHist.5", 3L);
expected.put("row.min", 2L);
expected.put("row.max", 10L);
expected.put("row.sum", 16L);
// Log2 Histogram
expected.put("row.logHist.1", 1L);
expected.put("row.logHist.2", 1L);
expected.put("row.logHist.3", 1L);
expected.put("family.min", 2L);
expected.put("family.max", 13L);
expected.put("family.sum", 22L);
// Log2 Histogram
expected.put("family.logHist.1", 1L);
expected.put("family.logHist.3", 1L);
expected.put("family.logHist.4", 1L);
expected.put("qualifier.min", 2L);
expected.put("qualifier.max", 16L);
expected.put("qualifier.sum", 28L);
// Log2 Histogram
expected.put("qualifier.logHist.1", 1L);
expected.put("qualifier.logHist.3", 1L);
expected.put("qualifier.logHist.4", 1L);
expected.put("visibility.min", 2L);
expected.put("visibility.max", 17L);
expected.put("visibility.sum", 30L);
// Log2 Histogram
expected.put("visibility.logHist.1", 1L);
expected.put("visibility.logHist.3", 1L);
expected.put("visibility.logHist.4", 1L);
expected.put("value.min", 2L);
expected.put("value.max", 9L);
expected.put("value.sum", 17L);
// Log2 Histogram
expected.put("value.logHist.1", 1L);
expected.put("value.logHist.3", 2L);
expected.put("total", 3L);
Assert.assertEquals(expected, stats);
}
use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.
the class SummaryIT method basicSummaryTest.
@Test
public void basicSummaryTest() throws Exception {
final String table = getUniqueNames(1)[0];
Connector c = getConnector();
NewTableConfiguration ntc = new NewTableConfiguration();
SummarizerConfiguration sc1 = SummarizerConfiguration.builder(BasicSummarizer.class.getName()).build();
ntc.enableSummarization(sc1);
c.tableOperations().create(table, ntc);
BatchWriter bw = writeData(table, c);
Collection<Summary> summaries = c.tableOperations().summaries(table).flush(false).retrieve();
Assert.assertEquals(0, summaries.size());
LongSummaryStatistics stats = getTimestampStats(table, c);
summaries = c.tableOperations().summaries(table).flush(true).retrieve();
checkSummaries(summaries, sc1, 1, 0, 0, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
Mutation m = new Mutation(String.format("r%09x", 999));
m.put("f1", "q1", "999-0");
m.putDelete("f1", "q2");
bw.addMutation(m);
bw.flush();
c.tableOperations().flush(table, null, null, true);
stats = getTimestampStats(table, c);
summaries = c.tableOperations().summaries(table).retrieve();
checkSummaries(summaries, sc1, 2, 0, 0, TOTAL_STAT, 100_002l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 1l);
bw.close();
c.tableOperations().compact(table, new CompactionConfig().setWait(true));
summaries = c.tableOperations().summaries(table).retrieve();
checkSummaries(summaries, sc1, 1, 0, 0, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
// split tablet into two
String sp1 = String.format("r%09x", 50_000);
addSplits(table, c, sp1);
summaries = c.tableOperations().summaries(table).retrieve();
checkSummaries(summaries, sc1, 1, 0, 0, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
// compact 2nd tablet
c.tableOperations().compact(table, new CompactionConfig().setStartRow(new Text(sp1)).setWait(true));
summaries = c.tableOperations().summaries(table).retrieve();
checkSummaries(summaries, sc1, 2, 0, 1, TOTAL_STAT, 113_999l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
// get summaries for first tablet
stats = getTimestampStats(table, c, sp1, null);
summaries = c.tableOperations().summaries(table).startRow(sp1).retrieve();
checkSummaries(summaries, sc1, 1, 0, 0, TOTAL_STAT, 49_999l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
// compact all tablets and regenerate all summaries
c.tableOperations().compact(table, new CompactionConfig());
summaries = c.tableOperations().summaries(table).retrieve();
stats = getTimestampStats(table, c);
checkSummaries(summaries, sc1, 2, 0, 0, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
summaries = c.tableOperations().summaries(table).startRow(String.format("r%09x", 75_000)).endRow(String.format("r%09x", 80_000)).retrieve();
Summary summary = Iterables.getOnlyElement(summaries);
Assert.assertEquals(1, summary.getFileStatistics().getTotal());
Assert.assertEquals(1, summary.getFileStatistics().getExtra());
long total = summary.getStatistics().get(TOTAL_STAT);
Assert.assertTrue("Total " + total + " out of expected range", total > 0 && total <= 10_000);
// test adding and removing
c.tableOperations().removeSummarizers(table, sc -> sc.getClassName().contains("foo"));
List<SummarizerConfiguration> summarizers = c.tableOperations().listSummarizers(table);
Assert.assertEquals(1, summarizers.size());
Assert.assertTrue(summarizers.contains(sc1));
c.tableOperations().removeSummarizers(table, sc -> sc.getClassName().equals(BasicSummarizer.class.getName()));
summarizers = c.tableOperations().listSummarizers(table);
Assert.assertEquals(0, summarizers.size());
c.tableOperations().compact(table, new CompactionConfig().setWait(true));
summaries = c.tableOperations().summaries(table).retrieve();
Assert.assertEquals(0, summaries.size());
c.tableOperations().addSummarizers(table, sc1);
c.tableOperations().compact(table, new CompactionConfig().setWait(true));
summaries = c.tableOperations().summaries(table).retrieve();
checkSummaries(summaries, sc1, 2, 0, 0, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
}
use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.
the class SummaryIT method tooLargeTest.
@Test
public void tooLargeTest() throws Exception {
final String table = getUniqueNames(1)[0];
Connector c = getConnector();
NewTableConfiguration ntc = new NewTableConfiguration();
SummarizerConfiguration sc1 = SummarizerConfiguration.builder(BigSummarizer.class).build();
ntc.enableSummarization(sc1);
c.tableOperations().create(table, ntc);
try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
write(bw, "a_large", "f1", "q1", "v1");
write(bw, "v_small", "f1", "q1", "v2");
}
c.tableOperations().flush(table, null, null, true);
Summary summary = c.tableOperations().summaries(table).retrieve().get(0);
Assert.assertEquals(1, summary.getFileStatistics().getLarge());
Assert.assertEquals(0, summary.getFileStatistics().getMissing());
Assert.assertEquals(0, summary.getFileStatistics().getExtra());
Assert.assertEquals(0, summary.getFileStatistics().getDeleted());
Assert.assertEquals(1, summary.getFileStatistics().getInaccurate());
Assert.assertEquals(1, summary.getFileStatistics().getTotal());
Assert.assertEquals(Collections.emptyMap(), summary.getStatistics());
// create situation where one tablet has summary data and one does not because the summary data was too large
c.tableOperations().addSplits(table, new TreeSet<>(Collections.singleton(new Text("m"))));
c.tableOperations().compact(table, new CompactionConfig().setWait(true));
summary = c.tableOperations().summaries(table).retrieve().get(0);
Assert.assertEquals(1, summary.getFileStatistics().getLarge());
Assert.assertEquals(0, summary.getFileStatistics().getMissing());
Assert.assertEquals(0, summary.getFileStatistics().getExtra());
Assert.assertEquals(0, summary.getFileStatistics().getDeleted());
Assert.assertEquals(1, summary.getFileStatistics().getInaccurate());
Assert.assertEquals(2, summary.getFileStatistics().getTotal());
HashMap<String, Long> expected = new HashMap<>();
for (int i = 0; i < 10; i++) {
expected.put(String.format("%09x", i), i * 19l);
}
Assert.assertEquals(expected, summary.getStatistics());
}
Aggregations