use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class BulkImporter method findOverlappingTablets.
public static List<TabletLocation> findOverlappingTablets(ClientContext context, VolumeManager vm, TabletLocator locator, Path file, Text startRow, Text endRow) throws Exception {
List<TabletLocation> result = new ArrayList<>();
Collection<ByteSequence> columnFamilies = Collections.emptyList();
String filename = file.toString();
// log.debug(filename + " finding overlapping tablets " + startRow + " -> " + endRow);
FileSystem fs = vm.getVolumeByPath(file).getFileSystem();
try (FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, fs.getConf()).withTableConfiguration(context.getConfiguration()).seekToBeginning().build()) {
Text row = startRow;
if (row == null)
row = new Text();
while (true) {
// log.debug(filename + " Seeking to row " + row);
reader.seek(new Range(row, null), columnFamilies, false);
if (!reader.hasTop()) {
// log.debug(filename + " not found");
break;
}
row = reader.getTopKey().getRow();
TabletLocation tabletLocation = locator.locateTablet(context, row, false, true);
// log.debug(filename + " found row " + row + " at location " + tabletLocation);
result.add(tabletLocation);
row = tabletLocation.tablet_extent.getEndRow();
if (row != null && (endRow == null || row.compareTo(endRow) < 0)) {
row = new Text(row);
row.append(byte0, 0, byte0.length);
} else
break;
}
}
// log.debug(filename + " to be sent to " + result);
return result;
}
use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class MultiThreadedRFileTest method checkIndex.
private static void checkIndex(Reader reader) throws IOException {
FileSKVIterator indexIter = reader.getIndex();
if (indexIter.hasTop()) {
Key lastKey = new Key(indexIter.getTopKey());
if (reader.getFirstKey().compareTo(lastKey) > 0)
throw new RuntimeException("First key out of order " + reader.getFirstKey() + " " + lastKey);
indexIter.next();
while (indexIter.hasTop()) {
if (lastKey.compareTo(indexIter.getTopKey()) > 0)
throw new RuntimeException("Indext out of order " + lastKey + " " + indexIter.getTopKey());
lastKey = new Key(indexIter.getTopKey());
indexIter.next();
}
if (!reader.getLastKey().equals(lastKey)) {
throw new RuntimeException("Last key out of order " + reader.getLastKey() + " " + lastKey);
}
}
}
use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class RFileTest method test3.
@Test
public void test3() throws IOException {
// test an rfile with multiple rows having multiple columns
TestRFile trf = new TestRFile(conf);
trf.openWriter();
int val = 0;
ArrayList<Key> expectedKeys = new ArrayList<>(10000);
ArrayList<Value> expectedValues = new ArrayList<>(10000);
for (int row = 0; row < 4; row++) {
String rowS = formatString("r_", row);
for (int cf = 0; cf < 4; cf++) {
String cfS = formatString("cf_", cf);
for (int cq = 0; cq < 4; cq++) {
String cqS = formatString("cq_", cq);
for (int cv = 'A'; cv < 'A' + 4; cv++) {
String cvS = "" + (char) cv;
for (int ts = 4; ts > 0; ts--) {
Key k = newKey(rowS, cfS, cqS, cvS, ts);
// check below ensures when all key sizes are same more than one index block is created
Assert.assertEquals(27, k.getSize());
k.setDeleted(true);
Value v = newValue("" + val);
trf.writer.append(k, v);
expectedKeys.add(k);
expectedValues.add(v);
k = newKey(rowS, cfS, cqS, cvS, ts);
Assert.assertEquals(27, k.getSize());
v = newValue("" + val);
trf.writer.append(k, v);
expectedKeys.add(k);
expectedValues.add(v);
val++;
}
}
}
}
}
// trf.writer.append(newKey("r1","cf1","cq1","L1", 55), newValue("foo"));
trf.closeWriter();
trf.openReader();
// seek before everything
trf.iter.seek(new Range((Key) null, null), EMPTY_COL_FAMS, false);
verify(trf, expectedKeys.iterator(), expectedValues.iterator());
// seek to the middle
int index = expectedKeys.size() / 2;
trf.seek(expectedKeys.get(index));
verify(trf, expectedKeys.subList(index, expectedKeys.size()).iterator(), expectedValues.subList(index, expectedKeys.size()).iterator());
// seek the first key
index = 0;
trf.seek(expectedKeys.get(index));
verify(trf, expectedKeys.subList(index, expectedKeys.size()).iterator(), expectedValues.subList(index, expectedKeys.size()).iterator());
// seek to the last key
index = expectedKeys.size() - 1;
trf.seek(expectedKeys.get(index));
verify(trf, expectedKeys.subList(index, expectedKeys.size()).iterator(), expectedValues.subList(index, expectedKeys.size()).iterator());
// seek after everything
index = expectedKeys.size();
trf.seek(new Key(new Text("z")));
verify(trf, expectedKeys.subList(index, expectedKeys.size()).iterator(), expectedValues.subList(index, expectedKeys.size()).iterator());
// test seeking to the current location
index = expectedKeys.size() / 2;
trf.seek(expectedKeys.get(index));
assertTrue(trf.iter.hasTop());
assertEquals(expectedKeys.get(index), trf.iter.getTopKey());
assertEquals(expectedValues.get(index), trf.iter.getTopValue());
trf.iter.next();
index++;
assertTrue(trf.iter.hasTop());
assertEquals(expectedKeys.get(index), trf.iter.getTopKey());
assertEquals(expectedValues.get(index), trf.iter.getTopValue());
trf.seek(expectedKeys.get(index));
assertTrue(trf.iter.hasTop());
assertEquals(expectedKeys.get(index), trf.iter.getTopKey());
assertEquals(expectedValues.get(index), trf.iter.getTopValue());
// test seeking to each location in the file
index = 0;
for (Key key : expectedKeys) {
trf.seek(key);
assertTrue(trf.iter.hasTop());
assertEquals(key, trf.iter.getTopKey());
assertEquals(expectedValues.get(index), trf.iter.getTopValue());
if (index > 0) {
// Key pkey =
expectedKeys.get(index - 1);
// assertEquals(pkey, trf.reader.getPrevKey());
}
index++;
}
// test seeking backwards to each key
for (int i = expectedKeys.size() - 1; i >= 0; i--) {
Key key = expectedKeys.get(i);
trf.seek(key);
assertTrue(trf.iter.hasTop());
assertEquals(key, trf.iter.getTopKey());
assertEquals(expectedValues.get(i), trf.iter.getTopValue());
if (i - 1 > 0) {
// Key pkey =
expectedKeys.get(i - 1);
// assertEquals(pkey, trf.reader.getPrevKey());
}
}
assertEquals(expectedKeys.get(expectedKeys.size() - 1), trf.reader.getLastKey());
// test seeking to random location and reading all data from that point
// there was an off by one bug with this in the transient index
Random rand = new Random();
for (int i = 0; i < 12; i++) {
index = rand.nextInt(expectedKeys.size());
trf.seek(expectedKeys.get(index));
for (; index < expectedKeys.size(); index++) {
assertTrue(trf.iter.hasTop());
assertEquals(expectedKeys.get(index), trf.iter.getTopKey());
assertEquals(expectedValues.get(index), trf.iter.getTopValue());
trf.iter.next();
}
}
// count the number of index entries
FileSKVIterator iiter = trf.reader.getIndex();
int count = 0;
while (iiter.hasTop()) {
count++;
iiter.next();
}
Assert.assertEquals(20, count);
trf.closeReader();
}
use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class RFileTest method testBigKeys.
@Test
public void testBigKeys() throws IOException {
// this test ensures that big keys do not end up index
ArrayList<Key> keys = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
String row = String.format("r%06d", i);
keys.add(new Key(row, "cf1", "cq1", 42));
}
// add a few keys with long rows
for (int i = 0; i < 1000; i += 100) {
String row = String.format("r%06d", i);
char[] ca = new char[1000];
Arrays.fill(ca, 'b');
row = row + new String(ca);
keys.add(new Key(row, "cf1", "cq1", 42));
}
Collections.sort(keys);
TestRFile trf = new TestRFile(conf);
trf.openWriter();
for (Key k : keys) {
trf.writer.append(k, new Value((k.hashCode() + "").getBytes()));
}
trf.writer.close();
trf.openReader();
FileSKVIterator iiter = trf.reader.getIndex();
while (iiter.hasTop()) {
Key k = iiter.getTopKey();
Assert.assertTrue(k + " " + k.getSize() + " >= 20", k.getSize() < 20);
iiter.next();
}
Collections.shuffle(keys);
for (Key key : keys) {
trf.reader.seek(new Range(key, null), EMPTY_COL_FAMS, false);
Assert.assertTrue(trf.reader.hasTop());
Assert.assertEquals(key, trf.reader.getTopKey());
Assert.assertEquals(new Value((key.hashCode() + "").getBytes()), trf.reader.getTopValue());
}
}
use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class RFileTest method testSampleLG.
@Test
public void testSampleLG() throws IOException {
int num = 5000;
for (int sampleBufferSize : new int[] { 1 << 10, 1 << 20 }) {
// force sample buffer to flush for smaller data
RFile.setSampleBufferSize(sampleBufferSize);
for (int modulus : new int[] { 19, 103, 1019 }) {
List<Entry<Key, Value>> sampleDataLG1 = new ArrayList<>();
List<Entry<Key, Value>> sampleDataLG2 = new ArrayList<>();
ConfigurationCopy sampleConf = new ConfigurationCopy(conf == null ? DefaultConfiguration.getInstance() : conf);
sampleConf.set(Property.TABLE_SAMPLER, RowSampler.class.getName());
sampleConf.set(Property.TABLE_SAMPLER_OPTS + "hasher", "murmur3_32");
sampleConf.set(Property.TABLE_SAMPLER_OPTS + "modulus", modulus + "");
Sampler sampler = SamplerFactory.newSampler(SamplerConfigurationImpl.newSamplerConfig(sampleConf), sampleConf);
TestRFile trf = new TestRFile(sampleConf);
trf.openWriter(false, 1000);
trf.writer.startNewLocalityGroup("meta-lg", newColFamByteSequence("metaA", "metaB"));
for (int r = 0; r < num; r++) {
String row = String.format("r%06d", r);
Key k1 = new Key(row, "metaA", "q9", 7);
Key k2 = new Key(row, "metaB", "q8", 7);
Key k3 = new Key(row, "metaB", "qA", 7);
Value v1 = new Value(("" + r).getBytes());
Value v2 = new Value(("" + r * 93).getBytes());
Value v3 = new Value(("" + r * 113).getBytes());
if (sampler.accept(k1)) {
sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k1, v1));
sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k2, v2));
sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k3, v3));
}
trf.writer.append(k1, v1);
trf.writer.append(k2, v2);
trf.writer.append(k3, v3);
}
trf.writer.startDefaultLocalityGroup();
for (int r = 0; r < num; r++) {
String row = String.format("r%06d", r);
Key k1 = new Key(row, "dataA", "q9", 7);
Value v1 = new Value(("" + r).getBytes());
if (sampler.accept(k1)) {
sampleDataLG2.add(new AbstractMap.SimpleImmutableEntry<>(k1, v1));
}
trf.writer.append(k1, v1);
}
trf.closeWriter();
Assert.assertTrue(sampleDataLG1.size() > 0);
Assert.assertTrue(sampleDataLG2.size() > 0);
trf.openReader(false);
FileSKVIterator sample = trf.reader.getSample(SamplerConfigurationImpl.newSamplerConfig(sampleConf));
checkSample(sample, sampleDataLG1, newColFamByteSequence("metaA", "metaB"), true);
checkSample(sample, sampleDataLG1, newColFamByteSequence("metaA"), true);
checkSample(sample, sampleDataLG1, newColFamByteSequence("metaB"), true);
checkSample(sample, sampleDataLG1, newColFamByteSequence("dataA"), false);
checkSample(sample, sampleDataLG2, newColFamByteSequence("metaA", "metaB"), false);
checkSample(sample, sampleDataLG2, newColFamByteSequence("dataA"), true);
ArrayList<Entry<Key, Value>> allSampleData = new ArrayList<>();
allSampleData.addAll(sampleDataLG1);
allSampleData.addAll(sampleDataLG2);
Collections.sort(allSampleData, new Comparator<Entry<Key, Value>>() {
@Override
public int compare(Entry<Key, Value> o1, Entry<Key, Value> o2) {
return o1.getKey().compareTo(o2.getKey());
}
});
checkSample(sample, allSampleData, newColFamByteSequence("dataA", "metaA"), true);
checkSample(sample, allSampleData, EMPTY_COL_FAMS, false);
trf.closeReader();
}
}
}
Aggregations