use of org.apache.accumulo.core.client.summary.Summary in project accumulo by apache.
the class SummaryIT method checkSummaries.
private void checkSummaries(Collection<Summary> summaries, SummarizerConfiguration sc, int total, int missing, int extra, Object... kvs) {
Summary summary = Iterables.getOnlyElement(summaries);
Assert.assertEquals("total wrong", total, summary.getFileStatistics().getTotal());
Assert.assertEquals("missing wrong", missing, summary.getFileStatistics().getMissing());
Assert.assertEquals("extra wrong", extra, summary.getFileStatistics().getExtra());
Assert.assertEquals("deleted wrong", 0, summary.getFileStatistics().getDeleted());
Assert.assertEquals(sc, summary.getSummarizerConfiguration());
Map<String, Long> expected = new HashMap<>();
for (int i = 0; i < kvs.length; i += 2) {
expected.put((String) kvs[i], (Long) kvs[i + 1]);
}
Assert.assertEquals(expected, summary.getStatistics());
}
use of org.apache.accumulo.core.client.summary.Summary in project accumulo by apache.
the class SummaryIT method compactionTest.
@Test
public void compactionTest() throws Exception {
final String table = getUniqueNames(1)[0];
Connector c = getConnector();
NewTableConfiguration ntc = new NewTableConfiguration();
SummarizerConfiguration sc1 = SummarizerConfiguration.builder(FooCounter.class.getName()).build();
ntc.enableSummarization(sc1);
c.tableOperations().create(table, ntc);
try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
write(bw, "bar1", "f1", "q1", "v1");
write(bw, "bar2", "f1", "q1", "v2");
write(bw, "foo1", "f1", "q1", "v3");
}
// Create a compaction config that will filter out foos if there are too many. Uses summary data to know if there are too many foos.
CompactionStrategyConfig csc = new CompactionStrategyConfig(FooCS.class.getName());
List<IteratorSetting> iterators = Collections.singletonList(new IteratorSetting(100, FooFilter.class));
CompactionConfig compactConfig = new CompactionConfig().setFlush(true).setCompactionStrategy(csc).setIterators(iterators).setWait(true);
// this compaction should make no changes because there are less foos than bars
c.tableOperations().compact(table, compactConfig);
try (Scanner scanner = c.createScanner(table, Authorizations.EMPTY)) {
Stream<Entry<Key, Value>> stream = StreamSupport.stream(scanner.spliterator(), false);
Map<String, Long> counts = // convert to row
stream.map(e -> e.getKey().getRowData().toString()).map(// strip numbers off row
r -> r.replaceAll("[0-9]+", "")).collect(// count different row types
groupingBy(identity(), counting()));
Assert.assertEquals(1l, (long) counts.getOrDefault("foo", 0l));
Assert.assertEquals(2l, (long) counts.getOrDefault("bar", 0l));
Assert.assertEquals(2, counts.size());
}
try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
write(bw, "foo2", "f1", "q1", "v4");
write(bw, "foo3", "f1", "q1", "v5");
write(bw, "foo4", "f1", "q1", "v6");
}
// this compaction should remove all foos because there are more foos than bars
c.tableOperations().compact(table, compactConfig);
try (Scanner scanner = c.createScanner(table, Authorizations.EMPTY)) {
Stream<Entry<Key, Value>> stream = StreamSupport.stream(scanner.spliterator(), false);
Map<String, Long> counts = // convert to row
stream.map(e -> e.getKey().getRowData().toString()).map(// strip numbers off row
r -> r.replaceAll("[0-9]+", "")).collect(// count different row types
groupingBy(identity(), counting()));
Assert.assertEquals(0l, (long) counts.getOrDefault("foo", 0l));
Assert.assertEquals(2l, (long) counts.getOrDefault("bar", 0l));
Assert.assertEquals(1, counts.size());
}
}
use of org.apache.accumulo.core.client.summary.Summary in project accumulo by apache.
the class SummaryCollection method getSummaries.
public List<Summary> getSummaries() {
ArrayList<Summary> ret = new ArrayList<>(mergedSummaries.size());
for (Entry<SummarizerConfiguration, MergedSummary> entry : mergedSummaries.entrySet()) {
SummarizerConfiguration config = entry.getKey();
MergedSummary ms = entry.getValue();
ret.add(new Summary(ms.summary, config, totalFiles, (totalFiles - deletedFiles) - ms.filesContaining, ms.filesExceedingBoundry, ms.filesLarge, deletedFiles));
}
return ret;
}
Aggregations