use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.
the class SummaryIT method testLocalityGroups.
@Test
public void testLocalityGroups() throws Exception {
final String table = getUniqueNames(1)[0];
Connector c = getConnector();
NewTableConfiguration ntc = new NewTableConfiguration();
SummarizerConfiguration sc1 = SummarizerConfiguration.builder(FamilySummarizer.class).build();
SummarizerConfiguration sc2 = SummarizerConfiguration.builder(BasicSummarizer.class).build();
ntc.enableSummarization(sc1, sc2);
c.tableOperations().create(table, ntc);
Map<String, Set<Text>> lgroups = new HashMap<>();
lgroups.put("lg1", ImmutableSet.of(new Text("chocolate"), new Text("coffee")));
lgroups.put("lg2", ImmutableSet.of(new Text(" broccoli "), new Text("cabbage")));
c.tableOperations().setLocalityGroups(table, lgroups);
Map<Key, Value> expected = new HashMap<>();
try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
write(bw, expected, "order:001", "chocolate", "dark", 3l, "99kg");
write(bw, expected, "order:001", "chocolate", "light", 4l, "94kg");
write(bw, expected, "order:001", "coffee", "dark", 5l, "33kg");
write(bw, expected, "order:001", "broccoli", "crowns", 6l, "2kg");
write(bw, expected, "order:001", "cheddar", "canadian", 7l, "40kg");
write(bw, expected, "order:653", "chocolate", "dark", 3l, "3kg");
write(bw, expected, "order:653", "chocolate", "light", 4l, "4kg");
write(bw, expected, "order:653", "coffee", "dark", 5l, "2kg");
write(bw, expected, "order:653", "broccoli", "crowns", 6l, "105kg");
write(bw, expected, "order:653", "cabbage", "heads", 7l, "199kg");
write(bw, expected, "order:653", "cheddar", "canadian", 8l, "43kg");
}
List<Summary> summaries = c.tableOperations().summaries(table).flush(true).retrieve();
Assert.assertEquals(2, summaries.stream().map(Summary::getSummarizerConfiguration).distinct().count());
for (Summary summary : summaries) {
if (summary.getSummarizerConfiguration().equals(sc1)) {
Map<String, Long> expectedStats = nm("c:chocolate", 4l, "c:coffee", 2l, "c:broccoli", 2l, "c:cheddar", 2l, "c:cabbage", 1l, TOO_LONG_STAT, 0l, TOO_MANY_STAT, 0l, SEEN_STAT, 11l, EMITTED_STAT, 11l, DELETES_IGNORED_STAT, 0l);
Assert.assertEquals(expectedStats, summary.getStatistics());
Assert.assertEquals(0, summary.getFileStatistics().getInaccurate());
Assert.assertEquals(1, summary.getFileStatistics().getTotal());
} else if (summary.getSummarizerConfiguration().equals(sc2)) {
Map<String, Long> expectedStats = nm(DELETES_STAT, 0l, TOTAL_STAT, 11l, MIN_TIMESTAMP_STAT, 3l, MAX_TIMESTAMP_STAT, 8l);
Assert.assertEquals(expectedStats, summary.getStatistics());
Assert.assertEquals(0, summary.getFileStatistics().getInaccurate());
Assert.assertEquals(1, summary.getFileStatistics().getTotal());
} else {
Assert.fail("unexpected summary config " + summary.getSummarizerConfiguration());
}
}
Map<Key, Value> actual = new HashMap<>();
c.createScanner(table, Authorizations.EMPTY).forEach(e -> actual.put(e.getKey(), e.getValue()));
Assert.assertEquals(expected, actual);
}
use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.
the class SummaryIT method testBuggySummarizer.
@Test
public void testBuggySummarizer() throws Exception {
final String table = getUniqueNames(1)[0];
Connector c = getConnector();
NewTableConfiguration ntc = new NewTableConfiguration();
SummarizerConfiguration sc1 = SummarizerConfiguration.builder(BuggySummarizer.class).build();
ntc.enableSummarization(sc1);
c.tableOperations().create(table, ntc);
// add a single split so that summary stats merge is forced
c.tableOperations().addSplits(table, new TreeSet<>(Collections.singleton(new Text("g"))));
try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
write(bw, "bar1", "f1", "q1", "v1");
write(bw, "bar2", "f1", "q1", "v2");
write(bw, "foo1", "f1", "q1", "v3");
}
c.tableOperations().flush(table, null, null, true);
try {
c.tableOperations().summaries(table).retrieve();
Assert.fail("Expected server side failure and did not see it");
} catch (AccumuloServerException ase) {
}
}
use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.
the class SummaryIT method selectionTest.
@Test
public void selectionTest() throws Exception {
final String table = getUniqueNames(1)[0];
Connector c = getConnector();
NewTableConfiguration ntc = new NewTableConfiguration();
SummarizerConfiguration sc1 = SummarizerConfiguration.builder(BasicSummarizer.class).build();
SummarizerConfiguration sc2 = SummarizerConfiguration.builder(KeySizeSummarizer.class).addOption("maxLen", "512").build();
ntc.enableSummarization(sc1, sc2);
c.tableOperations().create(table, ntc);
BatchWriter bw = writeData(table, c);
bw.close();
c.tableOperations().flush(table, null, null, true);
LongSummaryStatistics stats = getTimestampStats(table, c);
Collection<Summary> summaries = c.tableOperations().summaries(table).withConfiguration(sc2).retrieve();
Assert.assertEquals(1, summaries.size());
checkSummary(summaries, sc2, "len=14", 100_000l);
summaries = c.tableOperations().summaries(table).withConfiguration(sc1).retrieve();
Assert.assertEquals(1, summaries.size());
checkSummary(summaries, sc1, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
// retrieve a non-existant summary
SummarizerConfiguration sc3 = SummarizerConfiguration.builder(KeySizeSummarizer.class.getName()).addOption("maxLen", "256").build();
summaries = c.tableOperations().summaries(table).withConfiguration(sc3).retrieve();
Assert.assertEquals(0, summaries.size());
summaries = c.tableOperations().summaries(table).withConfiguration(sc1, sc2).retrieve();
Assert.assertEquals(2, summaries.size());
checkSummary(summaries, sc1, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
checkSummary(summaries, sc2, "len=14", 100_000l);
summaries = c.tableOperations().summaries(table).retrieve();
Assert.assertEquals(2, summaries.size());
checkSummary(summaries, sc1, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
checkSummary(summaries, sc2, "len=14", 100_000l);
summaries = c.tableOperations().summaries(table).withMatchingConfiguration(".*BasicSummarizer \\{\\}.*").retrieve();
Assert.assertEquals(1, summaries.size());
checkSummary(summaries, sc1, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
summaries = c.tableOperations().summaries(table).withMatchingConfiguration(".*KeySizeSummarizer \\{maxLen=512\\}.*").retrieve();
Assert.assertEquals(1, summaries.size());
checkSummary(summaries, sc2, "len=14", 100_000l);
summaries = c.tableOperations().summaries(table).withMatchingConfiguration(".*KeySizeSummarizer \\{maxLen=256\\}.*").retrieve();
Assert.assertEquals(0, summaries.size());
summaries = c.tableOperations().summaries(table).withMatchingConfiguration(".*BasicSummarizer \\{\\}.*").withConfiguration(sc2).retrieve();
Assert.assertEquals(2, summaries.size());
checkSummary(summaries, sc1, TOTAL_STAT, 100_000l, MIN_TIMESTAMP_STAT, stats.getMin(), MAX_TIMESTAMP_STAT, stats.getMax(), DELETES_STAT, 0l);
checkSummary(summaries, sc2, "len=14", 100_000l);
// Ensure a bad regex fails fast.
try {
summaries = c.tableOperations().summaries(table).withMatchingConfiguration(".*KeySizeSummarizer {maxLen=256}.*").retrieve();
Assert.fail("Bad regex should have caused exception");
} catch (PatternSyntaxException e) {
}
}
use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.
the class SummaryIT method testPermissions.
@Test
public void testPermissions() throws Exception {
final String table = getUniqueNames(1)[0];
Connector c = getConnector();
NewTableConfiguration ntc = new NewTableConfiguration();
SummarizerConfiguration sc1 = SummarizerConfiguration.builder(FooCounter.class).build();
ntc.enableSummarization(sc1);
c.tableOperations().create(table, ntc);
try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
write(bw, "bar1", "f1", "q1", "v1");
write(bw, "bar2", "f1", "q1", "v2");
write(bw, "foo1", "f1", "q1", "v3");
}
c.tableOperations().flush(table, null, null, true);
PasswordToken passTok = new PasswordToken("letmesee");
c.securityOperations().createLocalUser("user1", passTok);
String instanceName = c.getInstance().getInstanceName();
String zookeepers = c.getInstance().getZooKeepers();
Connector c2 = new ZooKeeperInstance(instanceName, zookeepers).getConnector("user1", passTok);
try {
c2.tableOperations().summaries(table).retrieve();
Assert.fail("Expected operation to fail because user does not have permssion to get summaries");
} catch (AccumuloSecurityException ase) {
Assert.assertEquals(SecurityErrorCode.PERMISSION_DENIED, ase.getSecurityErrorCode());
}
c.securityOperations().grantTablePermission("user1", table, TablePermission.GET_SUMMARIES);
int tries = 0;
while (tries < 10) {
try {
Summary summary = c2.tableOperations().summaries(table).retrieve().get(0);
Assert.assertEquals(2, summary.getStatistics().size());
Assert.assertEquals(2l, (long) summary.getStatistics().getOrDefault("bars", 0l));
Assert.assertEquals(1l, (long) summary.getStatistics().getOrDefault("foos", 0l));
break;
} catch (AccumuloSecurityException ase) {
UtilWaitThread.sleep(500);
tries++;
}
}
}
use of org.apache.accumulo.core.client.summary.SummarizerConfiguration in project accumulo by apache.
the class SummaryIT method compactionTest.
@Test
public void compactionTest() throws Exception {
final String table = getUniqueNames(1)[0];
Connector c = getConnector();
NewTableConfiguration ntc = new NewTableConfiguration();
SummarizerConfiguration sc1 = SummarizerConfiguration.builder(FooCounter.class.getName()).build();
ntc.enableSummarization(sc1);
c.tableOperations().create(table, ntc);
try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
write(bw, "bar1", "f1", "q1", "v1");
write(bw, "bar2", "f1", "q1", "v2");
write(bw, "foo1", "f1", "q1", "v3");
}
// Create a compaction config that will filter out foos if there are too many. Uses summary data to know if there are too many foos.
CompactionStrategyConfig csc = new CompactionStrategyConfig(FooCS.class.getName());
List<IteratorSetting> iterators = Collections.singletonList(new IteratorSetting(100, FooFilter.class));
CompactionConfig compactConfig = new CompactionConfig().setFlush(true).setCompactionStrategy(csc).setIterators(iterators).setWait(true);
// this compaction should make no changes because there are less foos than bars
c.tableOperations().compact(table, compactConfig);
try (Scanner scanner = c.createScanner(table, Authorizations.EMPTY)) {
Stream<Entry<Key, Value>> stream = StreamSupport.stream(scanner.spliterator(), false);
Map<String, Long> counts = // convert to row
stream.map(e -> e.getKey().getRowData().toString()).map(// strip numbers off row
r -> r.replaceAll("[0-9]+", "")).collect(// count different row types
groupingBy(identity(), counting()));
Assert.assertEquals(1l, (long) counts.getOrDefault("foo", 0l));
Assert.assertEquals(2l, (long) counts.getOrDefault("bar", 0l));
Assert.assertEquals(2, counts.size());
}
try (BatchWriter bw = c.createBatchWriter(table, new BatchWriterConfig())) {
write(bw, "foo2", "f1", "q1", "v4");
write(bw, "foo3", "f1", "q1", "v5");
write(bw, "foo4", "f1", "q1", "v6");
}
// this compaction should remove all foos because there are more foos than bars
c.tableOperations().compact(table, compactConfig);
try (Scanner scanner = c.createScanner(table, Authorizations.EMPTY)) {
Stream<Entry<Key, Value>> stream = StreamSupport.stream(scanner.spliterator(), false);
Map<String, Long> counts = // convert to row
stream.map(e -> e.getKey().getRowData().toString()).map(// strip numbers off row
r -> r.replaceAll("[0-9]+", "")).collect(// count different row types
groupingBy(identity(), counting()));
Assert.assertEquals(0l, (long) counts.getOrDefault("foo", 0l));
Assert.assertEquals(2l, (long) counts.getOrDefault("bar", 0l));
Assert.assertEquals(1, counts.size());
}
}
Aggregations