use of org.apache.accumulo.core.client.sample.Sampler in project accumulo by apache.
the class RFileTest method testSampleLG.
@Test
public void testSampleLG() throws IOException {
int num = 5000;
for (int sampleBufferSize : new int[] { 1 << 10, 1 << 20 }) {
// force sample buffer to flush for smaller data
RFile.setSampleBufferSize(sampleBufferSize);
for (int modulus : new int[] { 19, 103, 1019 }) {
List<Entry<Key, Value>> sampleDataLG1 = new ArrayList<>();
List<Entry<Key, Value>> sampleDataLG2 = new ArrayList<>();
ConfigurationCopy sampleConf = new ConfigurationCopy(conf == null ? DefaultConfiguration.getInstance() : conf);
sampleConf.set(Property.TABLE_SAMPLER, RowSampler.class.getName());
sampleConf.set(Property.TABLE_SAMPLER_OPTS + "hasher", "murmur3_32");
sampleConf.set(Property.TABLE_SAMPLER_OPTS + "modulus", modulus + "");
Sampler sampler = SamplerFactory.newSampler(SamplerConfigurationImpl.newSamplerConfig(sampleConf), sampleConf);
TestRFile trf = new TestRFile(sampleConf);
trf.openWriter(false, 1000);
trf.writer.startNewLocalityGroup("meta-lg", newColFamByteSequence("metaA", "metaB"));
for (int r = 0; r < num; r++) {
String row = String.format("r%06d", r);
Key k1 = new Key(row, "metaA", "q9", 7);
Key k2 = new Key(row, "metaB", "q8", 7);
Key k3 = new Key(row, "metaB", "qA", 7);
Value v1 = new Value(("" + r).getBytes());
Value v2 = new Value(("" + r * 93).getBytes());
Value v3 = new Value(("" + r * 113).getBytes());
if (sampler.accept(k1)) {
sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k1, v1));
sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k2, v2));
sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k3, v3));
}
trf.writer.append(k1, v1);
trf.writer.append(k2, v2);
trf.writer.append(k3, v3);
}
trf.writer.startDefaultLocalityGroup();
for (int r = 0; r < num; r++) {
String row = String.format("r%06d", r);
Key k1 = new Key(row, "dataA", "q9", 7);
Value v1 = new Value(("" + r).getBytes());
if (sampler.accept(k1)) {
sampleDataLG2.add(new AbstractMap.SimpleImmutableEntry<>(k1, v1));
}
trf.writer.append(k1, v1);
}
trf.closeWriter();
Assert.assertTrue(sampleDataLG1.size() > 0);
Assert.assertTrue(sampleDataLG2.size() > 0);
trf.openReader(false);
FileSKVIterator sample = trf.reader.getSample(SamplerConfigurationImpl.newSamplerConfig(sampleConf));
checkSample(sample, sampleDataLG1, newColFamByteSequence("metaA", "metaB"), true);
checkSample(sample, sampleDataLG1, newColFamByteSequence("metaA"), true);
checkSample(sample, sampleDataLG1, newColFamByteSequence("metaB"), true);
checkSample(sample, sampleDataLG1, newColFamByteSequence("dataA"), false);
checkSample(sample, sampleDataLG2, newColFamByteSequence("metaA", "metaB"), false);
checkSample(sample, sampleDataLG2, newColFamByteSequence("dataA"), true);
ArrayList<Entry<Key, Value>> allSampleData = new ArrayList<>();
allSampleData.addAll(sampleDataLG1);
allSampleData.addAll(sampleDataLG2);
Collections.sort(allSampleData, new Comparator<Entry<Key, Value>>() {
@Override
public int compare(Entry<Key, Value> o1, Entry<Key, Value> o2) {
return o1.getKey().compareTo(o2.getKey());
}
});
checkSample(sample, allSampleData, newColFamByteSequence("dataA", "metaA"), true);
checkSample(sample, allSampleData, EMPTY_COL_FAMS, false);
trf.closeReader();
}
}
}
use of org.apache.accumulo.core.client.sample.Sampler in project accumulo by apache.
the class InMemoryMapTest method runInterruptSampleTest.
private void runInterruptSampleTest(boolean deepCopy, boolean delete, boolean dcAfterDelete) throws Exception {
SamplerConfigurationImpl sampleConfig1 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "2"));
Sampler sampler = SamplerFactory.newSampler(sampleConfig1, DefaultConfiguration.getInstance());
ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
for (Entry<String, String> entry : sampleConfig1.toTablePropertiesMap().entrySet()) {
config1.set(entry.getKey(), entry.getValue());
}
InMemoryMap imm = new InMemoryMap(config1);
TreeMap<Key, Value> expectedSample = new TreeMap<>();
TreeMap<Key, Value> expectedAll = new TreeMap<>();
for (int r = 0; r < 1000; r++) {
String row = String.format("r%06d", r);
mutate(imm, row, "cf1:cq1", 5, "v" + (2 * r), sampler, expectedSample, expectedAll);
mutate(imm, row, "cf2:cq2", 5, "v" + ((2 * r) + 1), sampler, expectedSample, expectedAll);
}
assertTrue(expectedSample.size() > 0);
MemoryIterator miter = imm.skvIterator(sampleConfig1);
AtomicBoolean iFlag = new AtomicBoolean(false);
miter.setInterruptFlag(iFlag);
SortedKeyValueIterator<Key, Value> iter = miter;
if (delete && !dcAfterDelete) {
imm.delete(0);
}
if (deepCopy) {
iter = iter.deepCopy(new SampleIE(sampleConfig1));
}
if (delete && dcAfterDelete) {
imm.delete(0);
}
assertEquals(expectedSample, readAll(iter));
iFlag.set(true);
try {
readAll(iter);
Assert.fail();
} catch (IterationInterruptedException iie) {
}
miter.close();
}
use of org.apache.accumulo.core.client.sample.Sampler in project accumulo by apache.
the class InMemoryMapTest method testDeferredSamplerCreation.
@Test
public void testDeferredSamplerCreation() throws Exception {
SamplerConfigurationImpl sampleConfig1 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "9"));
ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
for (Entry<String, String> entry : sampleConfig1.toTablePropertiesMap().entrySet()) {
config1.set(entry.getKey(), entry.getValue());
}
InMemoryMap imm = new InMemoryMap(config1);
// change sampler config after creating in mem map.
SamplerConfigurationImpl sampleConfig2 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "7"));
for (Entry<String, String> entry : sampleConfig2.toTablePropertiesMap().entrySet()) {
config1.set(entry.getKey(), entry.getValue());
}
TreeMap<Key, Value> expectedSample = new TreeMap<>();
TreeMap<Key, Value> expectedAll = new TreeMap<>();
Sampler sampler = SamplerFactory.newSampler(sampleConfig2, config1);
for (int i = 0; i < 100; i++) {
mutate(imm, "r" + i, "cf:cq", 5, "v" + i, sampler, expectedSample, expectedAll);
}
MemoryIterator iter = imm.skvIterator(sampleConfig2);
iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
Assert.assertEquals(expectedSample, readAll(iter));
SortedKeyValueIterator<Key, Value> dc = iter.deepCopy(new SampleIE(sampleConfig2));
dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
Assert.assertEquals(expectedSample, readAll(dc));
iter = imm.skvIterator(null);
iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
Assert.assertEquals(expectedAll, readAll(iter));
iter = imm.skvIterator(sampleConfig1);
thrown.expect(SampleNotPresentException.class);
iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
}
use of org.apache.accumulo.core.client.sample.Sampler in project accumulo by apache.
the class SamplerFactory method newSampler.
public static Sampler newSampler(SamplerConfigurationImpl config, AccumuloConfiguration acuconf, boolean useAccumuloStart) throws IOException {
String context = acuconf.get(Property.TABLE_CLASSPATH);
Class<? extends Sampler> clazz;
try {
if (!useAccumuloStart)
clazz = SamplerFactory.class.getClassLoader().loadClass(config.getClassName()).asSubclass(Sampler.class);
if (context != null && !context.equals(""))
clazz = AccumuloVFSClassLoader.getContextManager().loadClass(context, config.getClassName(), Sampler.class);
else
clazz = AccumuloVFSClassLoader.loadClass(config.getClassName(), Sampler.class);
Sampler sampler = clazz.newInstance();
sampler.init(config.toSamplerConfiguration());
return sampler;
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
throw new RuntimeException(e);
}
}
use of org.apache.accumulo.core.client.sample.Sampler in project accumulo by apache.
the class InMemoryMapTest method testSample.
@Test
public void testSample() throws Exception {
SamplerConfigurationImpl sampleConfig = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "7"));
Sampler sampler = SamplerFactory.newSampler(sampleConfig, DefaultConfiguration.getInstance());
ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
for (Entry<String, String> entry : sampleConfig.toTablePropertiesMap().entrySet()) {
config1.set(entry.getKey(), entry.getValue());
}
ConfigurationCopy config2 = newConfig(tempFolder.newFolder().getAbsolutePath());
config2.set(Property.TABLE_LOCALITY_GROUP_PREFIX + "lg1", LocalityGroupUtil.encodeColumnFamilies(toTextSet("cf2")));
config2.set(Property.TABLE_LOCALITY_GROUPS.getKey(), "lg1");
for (Entry<String, String> entry : sampleConfig.toTablePropertiesMap().entrySet()) {
config2.set(entry.getKey(), entry.getValue());
}
for (ConfigurationCopy config : Arrays.asList(config1, config2)) {
InMemoryMap imm = new InMemoryMap(config);
TreeMap<Key, Value> expectedSample = new TreeMap<>();
TreeMap<Key, Value> expectedAll = new TreeMap<>();
TreeMap<Key, Value> expectedNone = new TreeMap<>();
MemoryIterator iter0 = imm.skvIterator(sampleConfig);
for (int r = 0; r < 100; r++) {
String row = String.format("r%06d", r);
mutate(imm, row, "cf1:cq1", 5, "v" + (2 * r), sampler, expectedSample, expectedAll);
mutate(imm, row, "cf2:cq2", 5, "v" + ((2 * r) + 1), sampler, expectedSample, expectedAll);
}
assertTrue(expectedSample.size() > 0);
MemoryIterator iter1 = imm.skvIterator(sampleConfig);
MemoryIterator iter2 = imm.skvIterator(null);
SortedKeyValueIterator<Key, Value> iter0dc1 = iter0.deepCopy(new SampleIE());
SortedKeyValueIterator<Key, Value> iter0dc2 = iter0.deepCopy(new SampleIE(sampleConfig));
SortedKeyValueIterator<Key, Value> iter1dc1 = iter1.deepCopy(new SampleIE());
SortedKeyValueIterator<Key, Value> iter1dc2 = iter1.deepCopy(new SampleIE(sampleConfig));
SortedKeyValueIterator<Key, Value> iter2dc1 = iter2.deepCopy(new SampleIE());
SortedKeyValueIterator<Key, Value> iter2dc2 = iter2.deepCopy(new SampleIE(sampleConfig));
assertEquals(expectedNone, readAll(iter0));
assertEquals(expectedNone, readAll(iter0dc1));
assertEquals(expectedNone, readAll(iter0dc2));
assertEquals(expectedSample, readAll(iter1));
assertEquals(expectedAll, readAll(iter2));
assertEquals(expectedAll, readAll(iter1dc1));
assertEquals(expectedAll, readAll(iter2dc1));
assertEquals(expectedSample, readAll(iter1dc2));
assertEquals(expectedSample, readAll(iter2dc2));
imm.delete(0);
assertEquals(expectedNone, readAll(iter0));
assertEquals(expectedNone, readAll(iter0dc1));
assertEquals(expectedNone, readAll(iter0dc2));
assertEquals(expectedSample, readAll(iter1));
assertEquals(expectedAll, readAll(iter2));
assertEquals(expectedAll, readAll(iter1dc1));
assertEquals(expectedAll, readAll(iter2dc1));
assertEquals(expectedSample, readAll(iter1dc2));
assertEquals(expectedSample, readAll(iter2dc2));
SortedKeyValueIterator<Key, Value> iter0dc3 = iter0.deepCopy(new SampleIE());
SortedKeyValueIterator<Key, Value> iter0dc4 = iter0.deepCopy(new SampleIE(sampleConfig));
SortedKeyValueIterator<Key, Value> iter1dc3 = iter1.deepCopy(new SampleIE());
SortedKeyValueIterator<Key, Value> iter1dc4 = iter1.deepCopy(new SampleIE(sampleConfig));
SortedKeyValueIterator<Key, Value> iter2dc3 = iter2.deepCopy(new SampleIE());
SortedKeyValueIterator<Key, Value> iter2dc4 = iter2.deepCopy(new SampleIE(sampleConfig));
assertEquals(expectedNone, readAll(iter0dc3));
assertEquals(expectedNone, readAll(iter0dc4));
assertEquals(expectedAll, readAll(iter1dc3));
assertEquals(expectedAll, readAll(iter2dc3));
assertEquals(expectedSample, readAll(iter1dc4));
assertEquals(expectedSample, readAll(iter2dc4));
iter1.close();
iter2.close();
}
}
Aggregations