use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class FileManager method reserveReaders.
private Map<FileSKVIterator, String> reserveReaders(KeyExtent tablet, Collection<String> files, boolean continueOnFailure) throws IOException {
if (!tablet.isMeta() && files.size() >= maxOpen) {
throw new IllegalArgumentException("requested files exceeds max open");
}
if (files.size() == 0) {
return Collections.emptyMap();
}
List<String> filesToOpen = null;
List<FileSKVIterator> filesToClose = Collections.emptyList();
Map<FileSKVIterator, String> readersReserved = new HashMap<>();
if (!tablet.isMeta()) {
filePermits.acquireUninterruptibly(files.size());
}
// a synch block
synchronized (this) {
filesToOpen = takeOpenFiles(files, readersReserved);
if (!filesToOpen.isEmpty()) {
int numOpen = countReaders(openFiles);
if (filesToOpen.size() + numOpen + reservedReaders.size() > maxOpen) {
filesToClose = takeLRUOpenFiles((filesToOpen.size() + numOpen + reservedReaders.size()) - maxOpen);
}
}
}
// close files before opening files to ensure we stay under resource
// limitations
closeReaders(filesToClose);
// open any files that need to be opened
for (String file : filesToOpen) {
try {
if (!file.contains(":"))
throw new IllegalArgumentException("Expected uri, got : " + file);
Path path = new Path(file);
FileSystem ns = fs.getVolumeByPath(path).getFileSystem();
// log.debug("Opening "+file + " path " + path);
FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(path.toString(), ns, ns.getConf()).withTableConfiguration(context.getServerConfigurationFactory().getTableConfiguration(tablet.getTableId())).withBlockCache(dataCache, indexCache).build();
readersReserved.put(reader, file);
} catch (Exception e) {
ProblemReports.getInstance(context).report(new ProblemReport(tablet.getTableId(), ProblemType.FILE_READ, file, e));
if (continueOnFailure) {
// release the permit for the file that failed to open
if (!tablet.isMeta()) {
filePermits.release(1);
}
log.warn("Failed to open file {} {} continuing...", file, e.getMessage(), e);
} else {
// close whatever files were opened
closeReaders(readersReserved.keySet());
if (!tablet.isMeta()) {
filePermits.release(files.size());
}
log.error("Failed to open file {} {}", file, e.getMessage());
throw new IOException("Failed to open " + file, e);
}
}
}
synchronized (this) {
// update set of reserved readers
reservedReaders.putAll(readersReserved);
}
return readersReserved;
}
use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class FileUtil method estimateSizes.
public static Map<KeyExtent, Long> estimateSizes(AccumuloConfiguration acuConf, Path mapFile, long fileSize, List<KeyExtent> extents, Configuration conf, VolumeManager fs) throws IOException {
long totalIndexEntries = 0;
Map<KeyExtent, MLong> counts = new TreeMap<>();
for (KeyExtent keyExtent : extents) counts.put(keyExtent, new MLong(0));
Text row = new Text();
FileSystem ns = fs.getVolumeByPath(mapFile).getFileSystem();
FileSKVIterator index = FileOperations.getInstance().newIndexReaderBuilder().forFile(mapFile.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
try {
while (index.hasTop()) {
Key key = index.getTopKey();
totalIndexEntries++;
key.getRow(row);
for (Entry<KeyExtent, MLong> entry : counts.entrySet()) if (entry.getKey().contains(row))
entry.getValue().l++;
index.next();
}
} finally {
try {
if (index != null)
index.close();
} catch (IOException e) {
// continue with next file
log.error("{}", e.getMessage(), e);
}
}
Map<KeyExtent, Long> results = new TreeMap<>();
for (KeyExtent keyExtent : extents) {
double numEntries = counts.get(keyExtent).l;
if (numEntries == 0)
numEntries = 1;
long estSize = (long) ((numEntries / totalIndexEntries) * fileSize);
results.put(keyExtent, estSize);
}
return results;
}
use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class FileUtil method tryToGetFirstAndLastRows.
public static Map<FileRef, FileInfo> tryToGetFirstAndLastRows(VolumeManager fs, AccumuloConfiguration acuConf, Set<FileRef> mapfiles) {
HashMap<FileRef, FileInfo> mapFilesInfo = new HashMap<>();
long t1 = System.currentTimeMillis();
for (FileRef mapfile : mapfiles) {
FileSKVIterator reader = null;
FileSystem ns = fs.getVolumeByPath(mapfile.path()).getFileSystem();
try {
reader = FileOperations.getInstance().newReaderBuilder().forFile(mapfile.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
Key firstKey = reader.getFirstKey();
if (firstKey != null) {
mapFilesInfo.put(mapfile, new FileInfo(firstKey, reader.getLastKey()));
}
} catch (IOException ioe) {
log.warn("Failed to read map file to determine first and last key : " + mapfile, ioe);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException ioe) {
log.warn("failed to close " + mapfile, ioe);
}
}
}
}
long t2 = System.currentTimeMillis();
log.debug(String.format("Found first and last keys for %d map files in %6.2f secs", mapfiles.size(), (t2 - t1) / 1000.0));
return mapFilesInfo;
}
use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class FileUtil method reduceFiles.
public static Collection<String> reduceFiles(AccumuloConfiguration acuConf, Configuration conf, VolumeManager fs, Text prevEndRow, Text endRow, Collection<String> mapFiles, int maxFiles, Path tmpDir, int pass) throws IOException {
ArrayList<String> paths = new ArrayList<>(mapFiles);
if (paths.size() <= maxFiles)
return paths;
String newDir = String.format("%s/pass_%04d", tmpDir, pass);
int start = 0;
ArrayList<String> outFiles = new ArrayList<>();
int count = 0;
while (start < paths.size()) {
int end = Math.min(maxFiles + start, paths.size());
List<String> inFiles = paths.subList(start, end);
start = end;
String newMapFile = String.format("%s/%04d.%s", newDir, count++, RFile.EXTENSION);
outFiles.add(newMapFile);
FileSystem ns = fs.getVolumeByPath(new Path(newMapFile)).getFileSystem();
FileSKVWriter writer = new RFileOperations().newWriterBuilder().forFile(newMapFile.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
writer.startDefaultLocalityGroup();
List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(inFiles.size());
FileSKVIterator reader = null;
try {
for (String s : inFiles) {
ns = fs.getVolumeByPath(new Path(s)).getFileSystem();
reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(s, ns, ns.getConf()).withTableConfiguration(acuConf).build();
iters.add(reader);
}
MultiIterator mmfi = new MultiIterator(iters, true);
while (mmfi.hasTop()) {
Key key = mmfi.getTopKey();
boolean gtPrevEndRow = prevEndRow == null || key.compareRow(prevEndRow) > 0;
boolean lteEndRow = endRow == null || key.compareRow(endRow) <= 0;
if (gtPrevEndRow && lteEndRow)
writer.append(key, new Value(new byte[0]));
if (!lteEndRow)
break;
mmfi.next();
}
} finally {
try {
if (reader != null)
reader.close();
} catch (IOException e) {
log.error("{}", e.getMessage(), e);
}
for (SortedKeyValueIterator<Key, Value> r : iters) try {
if (r != null)
((FileSKVIterator) r).close();
} catch (IOException e) {
// continue closing
log.error("{}", e.getMessage(), e);
}
try {
writer.close();
} catch (IOException e) {
log.error("{}", e.getMessage(), e);
throw e;
}
}
}
return reduceFiles(acuConf, conf, fs, prevEndRow, endRow, outFiles, maxFiles, tmpDir, pass + 1);
}
use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.
the class FileUtil method countIndexEntries.
private static long countIndexEntries(AccumuloConfiguration acuConf, Text prevEndRow, Text endRow, Collection<String> mapFiles, boolean useIndex, Configuration conf, VolumeManager fs, ArrayList<FileSKVIterator> readers) throws IOException {
long numKeys = 0;
// count the total number of index entries
for (String ref : mapFiles) {
FileSKVIterator reader = null;
Path path = new Path(ref);
FileSystem ns = fs.getVolumeByPath(path).getFileSystem();
try {
if (useIndex)
reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(path.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
else
reader = FileOperations.getInstance().newScanReaderBuilder().forFile(path.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).overRange(new Range(prevEndRow, false, null, true), LocalityGroupUtil.EMPTY_CF_SET, false).build();
while (reader.hasTop()) {
Key key = reader.getTopKey();
if (endRow != null && key.compareRow(endRow) > 0)
break;
else if (prevEndRow == null || key.compareRow(prevEndRow) > 0)
numKeys++;
reader.next();
}
} finally {
try {
if (reader != null)
reader.close();
} catch (IOException e) {
log.error("{}", e.getMessage(), e);
}
}
if (useIndex)
readers.add(FileOperations.getInstance().newIndexReaderBuilder().forFile(path.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build());
else
readers.add(FileOperations.getInstance().newScanReaderBuilder().forFile(path.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).overRange(new Range(prevEndRow, false, null, true), LocalityGroupUtil.EMPTY_CF_SET, false).build());
}
return numKeys;
}
Aggregations