use of org.apache.accumulo.core.file.rfile.RFile.Reader in project accumulo by apache.
the class SplitLarge method main.
public static void main(String[] args) throws Exception {
Configuration conf = CachedConfiguration.getInstance();
FileSystem fs = FileSystem.get(conf);
Opts opts = new Opts();
opts.parseArgs(SplitLarge.class.getName(), args);
for (String file : opts.files) {
AccumuloConfiguration aconf = DefaultConfiguration.getInstance();
Path path = new Path(file);
CachableBlockFile.Reader rdr = new CachableBlockFile.Reader(fs, path, conf, null, null, aconf);
try (Reader iter = new RFile.Reader(rdr)) {
if (!file.endsWith(".rf")) {
throw new IllegalArgumentException("File must end with .rf");
}
String smallName = file.substring(0, file.length() - 3) + "_small.rf";
String largeName = file.substring(0, file.length() - 3) + "_large.rf";
int blockSize = (int) aconf.getAsBytes(Property.TABLE_FILE_BLOCK_SIZE);
try (Writer small = new RFile.Writer(new CachableBlockFile.Writer(fs, new Path(smallName), "gz", null, conf, aconf), blockSize);
Writer large = new RFile.Writer(new CachableBlockFile.Writer(fs, new Path(largeName), "gz", null, conf, aconf), blockSize)) {
small.startDefaultLocalityGroup();
large.startDefaultLocalityGroup();
iter.seek(new Range(), new ArrayList<>(), false);
while (iter.hasTop()) {
Key key = iter.getTopKey();
Value value = iter.getTopValue();
if (key.getSize() + value.getSize() < opts.maxSize) {
small.append(key, value);
} else {
large.append(key, value);
}
iter.next();
}
}
}
}
}
use of org.apache.accumulo.core.file.rfile.RFile.Reader in project accumulo by apache.
the class RFileTest method runVersionTest.
private void runVersionTest(int version) throws IOException {
InputStream in = this.getClass().getClassLoader().getResourceAsStream("org/apache/accumulo/core/file/rfile/ver_" + version + ".rf");
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buf = new byte[1024];
int read;
while ((read = in.read(buf)) > 0) baos.write(buf, 0, read);
byte[] data = baos.toByteArray();
SeekableByteArrayInputStream bais = new SeekableByteArrayInputStream(data);
FSDataInputStream in2 = new FSDataInputStream(bais);
AccumuloConfiguration aconf = DefaultConfiguration.getInstance();
CachableBlockFile.Reader _cbr = new CachableBlockFile.Reader(in2, data.length, CachedConfiguration.getInstance(), aconf);
Reader reader = new RFile.Reader(_cbr);
checkIndex(reader);
ColumnFamilySkippingIterator iter = new ColumnFamilySkippingIterator(reader);
for (int start : new int[] { 0, 10, 100, 998 }) {
for (int cf = 1; cf <= 4; cf++) {
if (start == 0)
iter.seek(new Range(), newColFamByteSequence(formatString("cf_", cf)), true);
else
iter.seek(new Range(formatString("r_", start), null), newColFamByteSequence(formatString("cf_", cf)), true);
for (int i = start; i < 1000; i++) {
assertTrue(iter.hasTop());
assertEquals(newKey(formatString("r_", i), formatString("cf_", cf), formatString("cq_", 0), "", 1000 - i), iter.getTopKey());
assertEquals(newValue(i + ""), iter.getTopValue());
iter.next();
}
assertFalse(iter.hasTop());
}
if (start == 0)
iter.seek(new Range(), newColFamByteSequence(), false);
else
iter.seek(new Range(formatString("r_", start), null), newColFamByteSequence(), false);
for (int i = start; i < 1000; i++) {
for (int cf = 1; cf <= 4; cf++) {
assertTrue(iter.hasTop());
assertEquals(newKey(formatString("r_", i), formatString("cf_", cf), formatString("cq_", 0), "", 1000 - i), iter.getTopKey());
assertEquals(newValue(i + ""), iter.getTopValue());
iter.next();
}
}
assertFalse(iter.hasTop());
}
reader.close();
}
use of org.apache.accumulo.core.file.rfile.RFile.Reader in project accumulo by apache.
the class RFileTest method testWriterTableProperties.
@Test
public void testWriterTableProperties() throws Exception {
LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
String testFile = createTmpTestFile();
Map<String, String> props = new HashMap<>();
props.put(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE.getKey(), "1K");
props.put(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX.getKey(), "1K");
RFileWriter writer = RFile.newWriter().to(testFile).withFileSystem(localFs).withTableProperties(props).build();
SortedMap<Key, Value> testData1 = createTestData(10, 10, 10);
writer.append(testData1.entrySet());
writer.close();
Reader reader = getReader(localFs, testFile);
FileSKVIterator iiter = reader.getIndex();
int count = 0;
while (iiter.hasTop()) {
count++;
iiter.next();
}
// if settings are used then should create multiple index entries
Assert.assertTrue(count > 10);
reader.close();
Scanner scanner = RFile.newScanner().from(testFile).withFileSystem(localFs).build();
Assert.assertEquals(testData1, toMap(scanner));
scanner.close();
}
use of org.apache.accumulo.core.file.rfile.RFile.Reader in project accumulo by apache.
the class RFileTest method testLocalityGroups.
@Test
public void testLocalityGroups() throws Exception {
SortedMap<Key, Value> testData1 = createTestData(0, 10, 0, 2, 10);
SortedMap<Key, Value> testData2 = createTestData(0, 10, 2, 1, 10);
SortedMap<Key, Value> defaultData = createTestData(0, 10, 3, 7, 10);
LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
String testFile = createTmpTestFile();
RFileWriter writer = RFile.newWriter().to(testFile).withFileSystem(localFs).build();
writer.startNewLocalityGroup("z", colStr(0), colStr(1));
writer.append(testData1.entrySet());
writer.startNewLocalityGroup("h", colStr(2));
writer.append(testData2.entrySet());
writer.startDefaultLocalityGroup();
writer.append(defaultData.entrySet());
writer.close();
Scanner scanner = RFile.newScanner().from(testFile).withFileSystem(localFs).build();
scanner.fetchColumnFamily(new Text(colStr(0)));
scanner.fetchColumnFamily(new Text(colStr(1)));
Assert.assertEquals(testData1, toMap(scanner));
scanner.clearColumns();
scanner.fetchColumnFamily(new Text(colStr(2)));
Assert.assertEquals(testData2, toMap(scanner));
scanner.clearColumns();
for (int i = 3; i < 10; i++) {
scanner.fetchColumnFamily(new Text(colStr(i)));
}
Assert.assertEquals(defaultData, toMap(scanner));
scanner.clearColumns();
Assert.assertEquals(createTestData(10, 10, 10), toMap(scanner));
scanner.close();
Reader reader = getReader(localFs, testFile);
Map<String, ArrayList<ByteSequence>> lGroups = reader.getLocalityGroupCF();
Assert.assertTrue(lGroups.containsKey("z"));
Assert.assertTrue(lGroups.get("z").size() == 2);
Assert.assertTrue(lGroups.get("z").contains(new ArrayByteSequence(colStr(0))));
Assert.assertTrue(lGroups.get("z").contains(new ArrayByteSequence(colStr(1))));
Assert.assertTrue(lGroups.containsKey("h"));
Assert.assertEquals(Arrays.asList(new ArrayByteSequence(colStr(2))), lGroups.get("h"));
reader.close();
}
use of org.apache.accumulo.core.file.rfile.RFile.Reader in project accumulo by apache.
the class RFileScanner method iterator.
@Override
public Iterator<Entry<Key, Value>> iterator() {
try {
RFileSource[] sources = opts.in.getSources();
List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(sources.length);
for (int i = 0; i < sources.length; i++) {
// TODO may have been a bug with multiple files and caching in older version...
FSDataInputStream inputStream = (FSDataInputStream) sources[i].getInputStream();
readers.add(new RFile.Reader(new CachableBlockFile.Reader("source-" + i, inputStream, sources[i].getLength(), opts.in.getConf(), dataCache, indexCache, DefaultConfiguration.getInstance())));
}
if (getSamplerConfiguration() != null) {
for (int i = 0; i < readers.size(); i++) {
readers.set(i, ((Reader) readers.get(i)).getSample(new SamplerConfigurationImpl(getSamplerConfiguration())));
}
}
SortedKeyValueIterator<Key, Value> iterator;
if (opts.bounds != null) {
iterator = new MultiIterator(readers, opts.bounds);
} else {
iterator = new MultiIterator(readers, false);
}
Set<ByteSequence> families = Collections.emptySet();
if (opts.useSystemIterators) {
SortedSet<Column> cols = this.getFetchedColumns();
families = LocalityGroupUtil.families(cols);
iterator = IteratorUtil.setupSystemScanIterators(iterator, cols, getAuthorizations(), EMPTY_BYTES);
}
try {
if (opts.tableConfig != null && opts.tableConfig.size() > 0) {
ConfigurationCopy conf = new ConfigurationCopy(opts.tableConfig);
iterator = IteratorUtil.loadIterators(IteratorScope.scan, iterator, null, conf, serverSideIteratorList, serverSideIteratorOptions, new IterEnv());
} else {
iterator = IteratorUtil.loadIterators(iterator, serverSideIteratorList, serverSideIteratorOptions, new IterEnv(), false, null);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
iterator.seek(getRange() == null ? EMPTY_RANGE : getRange(), families, families.size() == 0 ? false : true);
return new IteratorAdapter(iterator);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
Aggregations