use of org.apache.accumulo.core.client.sample.RowSampler in project accumulo by apache.
the class RFileTest method testSampling.
@Test
public void testSampling() throws Exception {
SortedMap<Key, Value> testData1 = createTestData(1000, 2, 1);
LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
String testFile = createTmpTestFile();
SamplerConfiguration sc = new SamplerConfiguration(RowSampler.class).setOptions(ImmutableMap.of("hasher", "murmur3_32", "modulus", "19"));
RFileWriter writer = RFile.newWriter().to(testFile).withFileSystem(localFs).withSampler(sc).build();
writer.append(testData1.entrySet());
writer.close();
Scanner scanner = RFile.newScanner().from(testFile).withFileSystem(localFs).build();
scanner.setSamplerConfiguration(sc);
RowSampler rowSampler = new RowSampler();
rowSampler.init(sc);
SortedMap<Key, Value> sampleData = new TreeMap<>();
for (Entry<Key, Value> e : testData1.entrySet()) {
if (rowSampler.accept(e.getKey())) {
sampleData.put(e.getKey(), e.getValue());
}
}
Assert.assertTrue(sampleData.size() < testData1.size());
Assert.assertEquals(sampleData, toMap(scanner));
scanner.clearSamplerConfiguration();
Assert.assertEquals(testData1, toMap(scanner));
}
use of org.apache.accumulo.core.client.sample.RowSampler in project accumulo by apache.
the class SampleIT method writeData.
private String writeData(BatchWriter bw, SamplerConfiguration sc, TreeMap<Key, Value> expected) throws MutationsRejectedException {
int count = 0;
String someRow = null;
RowSampler sampler = new RowSampler();
sampler.init(sc);
for (int i = 0; i < 10000; i++) {
String row = String.format("r_%06d", i);
Mutation m = new Mutation(row);
m.put("cf1", "cq1", 7, "" + i);
m.put("cf1", "cq2", 7, "" + (100000000 - i));
bw.addMutation(m);
Key k1 = new Key(row, "cf1", "cq1", 7);
if (sampler.accept(k1)) {
expected.put(k1, new Value(("" + i).getBytes()));
count++;
if (count == 5) {
someRow = row;
}
}
Key k2 = new Key(row, "cf1", "cq2", 7);
if (sampler.accept(k2)) {
expected.put(k2, new Value(("" + (100000000 - i)).getBytes()));
}
}
bw.flush();
return someRow;
}
use of org.apache.accumulo.core.client.sample.RowSampler in project accumulo by apache.
the class SampleIT method updateExpected.
private void updateExpected(SamplerConfiguration sc, TreeMap<Key, Value> expected) {
expected.clear();
RowSampler sampler = new RowSampler();
sampler.init(sc);
for (int i = 0; i < 10000; i++) {
String row = String.format("r_%06d", i);
Key k1 = new Key(row, "cf1", "cq1", 7);
if (sampler.accept(k1)) {
expected.put(k1, new Value(("" + i).getBytes()));
}
Key k2 = new Key(row, "cf1", "cq2", 7);
if (sampler.accept(k2)) {
expected.put(k2, new Value(("" + (100000000 - i)).getBytes()));
}
}
}
Aggregations