Search in sources :

Example 1 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class BulkImporter method findOverlappingTablets.

public static List<TabletLocation> findOverlappingTablets(ClientContext context, VolumeManager vm, TabletLocator locator, Path file, Text startRow, Text endRow) throws Exception {
    List<TabletLocation> result = new ArrayList<>();
    Collection<ByteSequence> columnFamilies = Collections.emptyList();
    String filename = file.toString();
    // log.debug(filename + " finding overlapping tablets " + startRow + " -> " + endRow);
    FileSystem fs = vm.getVolumeByPath(file).getFileSystem();
    try (FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, fs.getConf()).withTableConfiguration(context.getConfiguration()).seekToBeginning().build()) {
        Text row = startRow;
        if (row == null)
            row = new Text();
        while (true) {
            // log.debug(filename + " Seeking to row " + row);
            reader.seek(new Range(row, null), columnFamilies, false);
            if (!reader.hasTop()) {
                // log.debug(filename + " not found");
                break;
            }
            row = reader.getTopKey().getRow();
            TabletLocation tabletLocation = locator.locateTablet(context, row, false, true);
            // log.debug(filename + " found row " + row + " at location " + tabletLocation);
            result.add(tabletLocation);
            row = tabletLocation.tablet_extent.getEndRow();
            if (row != null && (endRow == null || row.compareTo(endRow) < 0)) {
                row = new Text(row);
                row.append(byte0, 0, byte0.length);
            } else
                break;
        }
    }
    // log.debug(filename + " to be sent to " + result);
    return result;
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) TabletLocation(org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) ByteSequence(org.apache.accumulo.core.data.ByteSequence)

Example 2 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class MultiThreadedRFileTest method checkIndex.

private static void checkIndex(Reader reader) throws IOException {
    FileSKVIterator indexIter = reader.getIndex();
    if (indexIter.hasTop()) {
        Key lastKey = new Key(indexIter.getTopKey());
        if (reader.getFirstKey().compareTo(lastKey) > 0)
            throw new RuntimeException("First key out of order " + reader.getFirstKey() + " " + lastKey);
        indexIter.next();
        while (indexIter.hasTop()) {
            if (lastKey.compareTo(indexIter.getTopKey()) > 0)
                throw new RuntimeException("Indext out of order " + lastKey + " " + indexIter.getTopKey());
            lastKey = new Key(indexIter.getTopKey());
            indexIter.next();
        }
        if (!reader.getLastKey().equals(lastKey)) {
            throw new RuntimeException("Last key out of order " + reader.getLastKey() + " " + lastKey);
        }
    }
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Key(org.apache.accumulo.core.data.Key)

Example 3 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class RFileTest method test3.

@Test
public void test3() throws IOException {
    // test an rfile with multiple rows having multiple columns
    TestRFile trf = new TestRFile(conf);
    trf.openWriter();
    int val = 0;
    ArrayList<Key> expectedKeys = new ArrayList<>(10000);
    ArrayList<Value> expectedValues = new ArrayList<>(10000);
    for (int row = 0; row < 4; row++) {
        String rowS = formatString("r_", row);
        for (int cf = 0; cf < 4; cf++) {
            String cfS = formatString("cf_", cf);
            for (int cq = 0; cq < 4; cq++) {
                String cqS = formatString("cq_", cq);
                for (int cv = 'A'; cv < 'A' + 4; cv++) {
                    String cvS = "" + (char) cv;
                    for (int ts = 4; ts > 0; ts--) {
                        Key k = newKey(rowS, cfS, cqS, cvS, ts);
                        // check below ensures when all key sizes are same more than one index block is created
                        Assert.assertEquals(27, k.getSize());
                        k.setDeleted(true);
                        Value v = newValue("" + val);
                        trf.writer.append(k, v);
                        expectedKeys.add(k);
                        expectedValues.add(v);
                        k = newKey(rowS, cfS, cqS, cvS, ts);
                        Assert.assertEquals(27, k.getSize());
                        v = newValue("" + val);
                        trf.writer.append(k, v);
                        expectedKeys.add(k);
                        expectedValues.add(v);
                        val++;
                    }
                }
            }
        }
    }
    // trf.writer.append(newKey("r1","cf1","cq1","L1", 55), newValue("foo"));
    trf.closeWriter();
    trf.openReader();
    // seek before everything
    trf.iter.seek(new Range((Key) null, null), EMPTY_COL_FAMS, false);
    verify(trf, expectedKeys.iterator(), expectedValues.iterator());
    // seek to the middle
    int index = expectedKeys.size() / 2;
    trf.seek(expectedKeys.get(index));
    verify(trf, expectedKeys.subList(index, expectedKeys.size()).iterator(), expectedValues.subList(index, expectedKeys.size()).iterator());
    // seek the first key
    index = 0;
    trf.seek(expectedKeys.get(index));
    verify(trf, expectedKeys.subList(index, expectedKeys.size()).iterator(), expectedValues.subList(index, expectedKeys.size()).iterator());
    // seek to the last key
    index = expectedKeys.size() - 1;
    trf.seek(expectedKeys.get(index));
    verify(trf, expectedKeys.subList(index, expectedKeys.size()).iterator(), expectedValues.subList(index, expectedKeys.size()).iterator());
    // seek after everything
    index = expectedKeys.size();
    trf.seek(new Key(new Text("z")));
    verify(trf, expectedKeys.subList(index, expectedKeys.size()).iterator(), expectedValues.subList(index, expectedKeys.size()).iterator());
    // test seeking to the current location
    index = expectedKeys.size() / 2;
    trf.seek(expectedKeys.get(index));
    assertTrue(trf.iter.hasTop());
    assertEquals(expectedKeys.get(index), trf.iter.getTopKey());
    assertEquals(expectedValues.get(index), trf.iter.getTopValue());
    trf.iter.next();
    index++;
    assertTrue(trf.iter.hasTop());
    assertEquals(expectedKeys.get(index), trf.iter.getTopKey());
    assertEquals(expectedValues.get(index), trf.iter.getTopValue());
    trf.seek(expectedKeys.get(index));
    assertTrue(trf.iter.hasTop());
    assertEquals(expectedKeys.get(index), trf.iter.getTopKey());
    assertEquals(expectedValues.get(index), trf.iter.getTopValue());
    // test seeking to each location in the file
    index = 0;
    for (Key key : expectedKeys) {
        trf.seek(key);
        assertTrue(trf.iter.hasTop());
        assertEquals(key, trf.iter.getTopKey());
        assertEquals(expectedValues.get(index), trf.iter.getTopValue());
        if (index > 0) {
            // Key pkey =
            expectedKeys.get(index - 1);
        // assertEquals(pkey, trf.reader.getPrevKey());
        }
        index++;
    }
    // test seeking backwards to each key
    for (int i = expectedKeys.size() - 1; i >= 0; i--) {
        Key key = expectedKeys.get(i);
        trf.seek(key);
        assertTrue(trf.iter.hasTop());
        assertEquals(key, trf.iter.getTopKey());
        assertEquals(expectedValues.get(i), trf.iter.getTopValue());
        if (i - 1 > 0) {
            // Key pkey =
            expectedKeys.get(i - 1);
        // assertEquals(pkey, trf.reader.getPrevKey());
        }
    }
    assertEquals(expectedKeys.get(expectedKeys.size() - 1), trf.reader.getLastKey());
    // test seeking to random location and reading all data from that point
    // there was an off by one bug with this in the transient index
    Random rand = new Random();
    for (int i = 0; i < 12; i++) {
        index = rand.nextInt(expectedKeys.size());
        trf.seek(expectedKeys.get(index));
        for (; index < expectedKeys.size(); index++) {
            assertTrue(trf.iter.hasTop());
            assertEquals(expectedKeys.get(index), trf.iter.getTopKey());
            assertEquals(expectedValues.get(index), trf.iter.getTopValue());
            trf.iter.next();
        }
    }
    // count the number of index entries
    FileSKVIterator iiter = trf.reader.getIndex();
    int count = 0;
    while (iiter.hasTop()) {
        count++;
        iiter.next();
    }
    Assert.assertEquals(20, count);
    trf.closeReader();
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Random(java.util.Random) ArrayList(java.util.ArrayList) Value(org.apache.accumulo.core.data.Value) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) CryptoTest(org.apache.accumulo.core.security.crypto.CryptoTest) Test(org.junit.Test)

Example 4 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class RFileTest method testBigKeys.

@Test
public void testBigKeys() throws IOException {
    // this test ensures that big keys do not end up index
    ArrayList<Key> keys = new ArrayList<>();
    for (int i = 0; i < 1000; i++) {
        String row = String.format("r%06d", i);
        keys.add(new Key(row, "cf1", "cq1", 42));
    }
    // add a few keys with long rows
    for (int i = 0; i < 1000; i += 100) {
        String row = String.format("r%06d", i);
        char[] ca = new char[1000];
        Arrays.fill(ca, 'b');
        row = row + new String(ca);
        keys.add(new Key(row, "cf1", "cq1", 42));
    }
    Collections.sort(keys);
    TestRFile trf = new TestRFile(conf);
    trf.openWriter();
    for (Key k : keys) {
        trf.writer.append(k, new Value((k.hashCode() + "").getBytes()));
    }
    trf.writer.close();
    trf.openReader();
    FileSKVIterator iiter = trf.reader.getIndex();
    while (iiter.hasTop()) {
        Key k = iiter.getTopKey();
        Assert.assertTrue(k + " " + k.getSize() + " >= 20", k.getSize() < 20);
        iiter.next();
    }
    Collections.shuffle(keys);
    for (Key key : keys) {
        trf.reader.seek(new Range(key, null), EMPTY_COL_FAMS, false);
        Assert.assertTrue(trf.reader.hasTop());
        Assert.assertEquals(key, trf.reader.getTopKey());
        Assert.assertEquals(new Value((key.hashCode() + "").getBytes()), trf.reader.getTopValue());
    }
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ArrayList(java.util.ArrayList) Value(org.apache.accumulo.core.data.Value) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) CryptoTest(org.apache.accumulo.core.security.crypto.CryptoTest) Test(org.junit.Test)

Example 5 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class RFileTest method testSampleLG.

@Test
public void testSampleLG() throws IOException {
    int num = 5000;
    for (int sampleBufferSize : new int[] { 1 << 10, 1 << 20 }) {
        // force sample buffer to flush for smaller data
        RFile.setSampleBufferSize(sampleBufferSize);
        for (int modulus : new int[] { 19, 103, 1019 }) {
            List<Entry<Key, Value>> sampleDataLG1 = new ArrayList<>();
            List<Entry<Key, Value>> sampleDataLG2 = new ArrayList<>();
            ConfigurationCopy sampleConf = new ConfigurationCopy(conf == null ? DefaultConfiguration.getInstance() : conf);
            sampleConf.set(Property.TABLE_SAMPLER, RowSampler.class.getName());
            sampleConf.set(Property.TABLE_SAMPLER_OPTS + "hasher", "murmur3_32");
            sampleConf.set(Property.TABLE_SAMPLER_OPTS + "modulus", modulus + "");
            Sampler sampler = SamplerFactory.newSampler(SamplerConfigurationImpl.newSamplerConfig(sampleConf), sampleConf);
            TestRFile trf = new TestRFile(sampleConf);
            trf.openWriter(false, 1000);
            trf.writer.startNewLocalityGroup("meta-lg", newColFamByteSequence("metaA", "metaB"));
            for (int r = 0; r < num; r++) {
                String row = String.format("r%06d", r);
                Key k1 = new Key(row, "metaA", "q9", 7);
                Key k2 = new Key(row, "metaB", "q8", 7);
                Key k3 = new Key(row, "metaB", "qA", 7);
                Value v1 = new Value(("" + r).getBytes());
                Value v2 = new Value(("" + r * 93).getBytes());
                Value v3 = new Value(("" + r * 113).getBytes());
                if (sampler.accept(k1)) {
                    sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k1, v1));
                    sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k2, v2));
                    sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<>(k3, v3));
                }
                trf.writer.append(k1, v1);
                trf.writer.append(k2, v2);
                trf.writer.append(k3, v3);
            }
            trf.writer.startDefaultLocalityGroup();
            for (int r = 0; r < num; r++) {
                String row = String.format("r%06d", r);
                Key k1 = new Key(row, "dataA", "q9", 7);
                Value v1 = new Value(("" + r).getBytes());
                if (sampler.accept(k1)) {
                    sampleDataLG2.add(new AbstractMap.SimpleImmutableEntry<>(k1, v1));
                }
                trf.writer.append(k1, v1);
            }
            trf.closeWriter();
            Assert.assertTrue(sampleDataLG1.size() > 0);
            Assert.assertTrue(sampleDataLG2.size() > 0);
            trf.openReader(false);
            FileSKVIterator sample = trf.reader.getSample(SamplerConfigurationImpl.newSamplerConfig(sampleConf));
            checkSample(sample, sampleDataLG1, newColFamByteSequence("metaA", "metaB"), true);
            checkSample(sample, sampleDataLG1, newColFamByteSequence("metaA"), true);
            checkSample(sample, sampleDataLG1, newColFamByteSequence("metaB"), true);
            checkSample(sample, sampleDataLG1, newColFamByteSequence("dataA"), false);
            checkSample(sample, sampleDataLG2, newColFamByteSequence("metaA", "metaB"), false);
            checkSample(sample, sampleDataLG2, newColFamByteSequence("dataA"), true);
            ArrayList<Entry<Key, Value>> allSampleData = new ArrayList<>();
            allSampleData.addAll(sampleDataLG1);
            allSampleData.addAll(sampleDataLG2);
            Collections.sort(allSampleData, new Comparator<Entry<Key, Value>>() {

                @Override
                public int compare(Entry<Key, Value> o1, Entry<Key, Value> o2) {
                    return o1.getKey().compareTo(o2.getKey());
                }
            });
            checkSample(sample, allSampleData, newColFamByteSequence("dataA", "metaA"), true);
            checkSample(sample, allSampleData, EMPTY_COL_FAMS, false);
            trf.closeReader();
        }
    }
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) ArrayList(java.util.ArrayList) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) AbstractMap(java.util.AbstractMap) Entry(java.util.Map.Entry) Sampler(org.apache.accumulo.core.client.sample.Sampler) RowSampler(org.apache.accumulo.core.client.sample.RowSampler) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) CryptoTest(org.apache.accumulo.core.security.crypto.CryptoTest) Test(org.junit.Test)

Aggregations

FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)32 Key (org.apache.accumulo.core.data.Key)22 FileSystem (org.apache.hadoop.fs.FileSystem)17 ArrayList (java.util.ArrayList)13 PartialKey (org.apache.accumulo.core.data.PartialKey)13 Value (org.apache.accumulo.core.data.Value)13 IOException (java.io.IOException)11 Configuration (org.apache.hadoop.conf.Configuration)10 Path (org.apache.hadoop.fs.Path)9 Range (org.apache.accumulo.core.data.Range)7 CachedConfiguration (org.apache.accumulo.core.util.CachedConfiguration)7 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)5 ConfigurationCopy (org.apache.accumulo.core.conf.ConfigurationCopy)5 SortedKeyValueIterator (org.apache.accumulo.core.iterators.SortedKeyValueIterator)5 MultiIterator (org.apache.accumulo.core.iterators.system.MultiIterator)5 Text (org.apache.hadoop.io.Text)5 Test (org.junit.Test)5 File (java.io.File)4 HashMap (java.util.HashMap)4 CryptoTest (org.apache.accumulo.core.security.crypto.CryptoTest)4