Search in sources :

Example 11 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class FileManager method reserveReaders.

private Map<FileSKVIterator, String> reserveReaders(KeyExtent tablet, Collection<String> files, boolean continueOnFailure) throws IOException {
    if (!tablet.isMeta() && files.size() >= maxOpen) {
        throw new IllegalArgumentException("requested files exceeds max open");
    }
    if (files.size() == 0) {
        return Collections.emptyMap();
    }
    List<String> filesToOpen = null;
    List<FileSKVIterator> filesToClose = Collections.emptyList();
    Map<FileSKVIterator, String> readersReserved = new HashMap<>();
    if (!tablet.isMeta()) {
        filePermits.acquireUninterruptibly(files.size());
    }
    // a synch block
    synchronized (this) {
        filesToOpen = takeOpenFiles(files, readersReserved);
        if (!filesToOpen.isEmpty()) {
            int numOpen = countReaders(openFiles);
            if (filesToOpen.size() + numOpen + reservedReaders.size() > maxOpen) {
                filesToClose = takeLRUOpenFiles((filesToOpen.size() + numOpen + reservedReaders.size()) - maxOpen);
            }
        }
    }
    // close files before opening files to ensure we stay under resource
    // limitations
    closeReaders(filesToClose);
    // open any files that need to be opened
    for (String file : filesToOpen) {
        try {
            if (!file.contains(":"))
                throw new IllegalArgumentException("Expected uri, got : " + file);
            Path path = new Path(file);
            FileSystem ns = fs.getVolumeByPath(path).getFileSystem();
            // log.debug("Opening "+file + " path " + path);
            FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(path.toString(), ns, ns.getConf()).withTableConfiguration(context.getServerConfigurationFactory().getTableConfiguration(tablet.getTableId())).withBlockCache(dataCache, indexCache).build();
            readersReserved.put(reader, file);
        } catch (Exception e) {
            ProblemReports.getInstance(context).report(new ProblemReport(tablet.getTableId(), ProblemType.FILE_READ, file, e));
            if (continueOnFailure) {
                // release the permit for the file that failed to open
                if (!tablet.isMeta()) {
                    filePermits.release(1);
                }
                log.warn("Failed to open file {} {} continuing...", file, e.getMessage(), e);
            } else {
                // close whatever files were opened
                closeReaders(readersReserved.keySet());
                if (!tablet.isMeta()) {
                    filePermits.release(files.size());
                }
                log.error("Failed to open file {} {}", file, e.getMessage());
                throw new IOException("Failed to open " + file, e);
            }
        }
    }
    synchronized (this) {
        // update set of reserved readers
        reservedReaders.putAll(readersReserved);
    }
    return readersReserved;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) HashMap(java.util.HashMap) IOException(java.io.IOException) IOException(java.io.IOException) SampleNotPresentException(org.apache.accumulo.core.client.SampleNotPresentException) ProblemReport(org.apache.accumulo.server.problems.ProblemReport) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 12 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class FileUtil method estimateSizes.

public static Map<KeyExtent, Long> estimateSizes(AccumuloConfiguration acuConf, Path mapFile, long fileSize, List<KeyExtent> extents, Configuration conf, VolumeManager fs) throws IOException {
    long totalIndexEntries = 0;
    Map<KeyExtent, MLong> counts = new TreeMap<>();
    for (KeyExtent keyExtent : extents) counts.put(keyExtent, new MLong(0));
    Text row = new Text();
    FileSystem ns = fs.getVolumeByPath(mapFile).getFileSystem();
    FileSKVIterator index = FileOperations.getInstance().newIndexReaderBuilder().forFile(mapFile.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
    try {
        while (index.hasTop()) {
            Key key = index.getTopKey();
            totalIndexEntries++;
            key.getRow(row);
            for (Entry<KeyExtent, MLong> entry : counts.entrySet()) if (entry.getKey().contains(row))
                entry.getValue().l++;
            index.next();
        }
    } finally {
        try {
            if (index != null)
                index.close();
        } catch (IOException e) {
            // continue with next file
            log.error("{}", e.getMessage(), e);
        }
    }
    Map<KeyExtent, Long> results = new TreeMap<>();
    for (KeyExtent keyExtent : extents) {
        double numEntries = counts.get(keyExtent).l;
        if (numEntries == 0)
            numEntries = 1;
        long estSize = (long) ((numEntries / totalIndexEntries) * fileSize);
        results.put(keyExtent, estSize);
    }
    return results;
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) TreeMap(java.util.TreeMap) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) FileSystem(org.apache.hadoop.fs.FileSystem) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 13 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class FileUtil method tryToGetFirstAndLastRows.

public static Map<FileRef, FileInfo> tryToGetFirstAndLastRows(VolumeManager fs, AccumuloConfiguration acuConf, Set<FileRef> mapfiles) {
    HashMap<FileRef, FileInfo> mapFilesInfo = new HashMap<>();
    long t1 = System.currentTimeMillis();
    for (FileRef mapfile : mapfiles) {
        FileSKVIterator reader = null;
        FileSystem ns = fs.getVolumeByPath(mapfile.path()).getFileSystem();
        try {
            reader = FileOperations.getInstance().newReaderBuilder().forFile(mapfile.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
            Key firstKey = reader.getFirstKey();
            if (firstKey != null) {
                mapFilesInfo.put(mapfile, new FileInfo(firstKey, reader.getLastKey()));
            }
        } catch (IOException ioe) {
            log.warn("Failed to read map file to determine first and last key : " + mapfile, ioe);
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException ioe) {
                    log.warn("failed to close " + mapfile, ioe);
                }
            }
        }
    }
    long t2 = System.currentTimeMillis();
    log.debug(String.format("Found first and last keys for %d map files in %6.2f secs", mapfiles.size(), (t2 - t1) / 1000.0));
    return mapFilesInfo;
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) FileRef(org.apache.accumulo.server.fs.FileRef) HashMap(java.util.HashMap) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 14 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class FileUtil method reduceFiles.

public static Collection<String> reduceFiles(AccumuloConfiguration acuConf, Configuration conf, VolumeManager fs, Text prevEndRow, Text endRow, Collection<String> mapFiles, int maxFiles, Path tmpDir, int pass) throws IOException {
    ArrayList<String> paths = new ArrayList<>(mapFiles);
    if (paths.size() <= maxFiles)
        return paths;
    String newDir = String.format("%s/pass_%04d", tmpDir, pass);
    int start = 0;
    ArrayList<String> outFiles = new ArrayList<>();
    int count = 0;
    while (start < paths.size()) {
        int end = Math.min(maxFiles + start, paths.size());
        List<String> inFiles = paths.subList(start, end);
        start = end;
        String newMapFile = String.format("%s/%04d.%s", newDir, count++, RFile.EXTENSION);
        outFiles.add(newMapFile);
        FileSystem ns = fs.getVolumeByPath(new Path(newMapFile)).getFileSystem();
        FileSKVWriter writer = new RFileOperations().newWriterBuilder().forFile(newMapFile.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
        writer.startDefaultLocalityGroup();
        List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(inFiles.size());
        FileSKVIterator reader = null;
        try {
            for (String s : inFiles) {
                ns = fs.getVolumeByPath(new Path(s)).getFileSystem();
                reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(s, ns, ns.getConf()).withTableConfiguration(acuConf).build();
                iters.add(reader);
            }
            MultiIterator mmfi = new MultiIterator(iters, true);
            while (mmfi.hasTop()) {
                Key key = mmfi.getTopKey();
                boolean gtPrevEndRow = prevEndRow == null || key.compareRow(prevEndRow) > 0;
                boolean lteEndRow = endRow == null || key.compareRow(endRow) <= 0;
                if (gtPrevEndRow && lteEndRow)
                    writer.append(key, new Value(new byte[0]));
                if (!lteEndRow)
                    break;
                mmfi.next();
            }
        } finally {
            try {
                if (reader != null)
                    reader.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
            }
            for (SortedKeyValueIterator<Key, Value> r : iters) try {
                if (r != null)
                    ((FileSKVIterator) r).close();
            } catch (IOException e) {
                // continue closing
                log.error("{}", e.getMessage(), e);
            }
            try {
                writer.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
                throw e;
            }
        }
    }
    return reduceFiles(acuConf, conf, fs, prevEndRow, endRow, outFiles, maxFiles, tmpDir, pass + 1);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) IOException(java.io.IOException) RFileOperations(org.apache.accumulo.core.file.rfile.RFileOperations) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 15 with FileSKVIterator

use of org.apache.accumulo.core.file.FileSKVIterator in project accumulo by apache.

the class FileUtil method countIndexEntries.

private static long countIndexEntries(AccumuloConfiguration acuConf, Text prevEndRow, Text endRow, Collection<String> mapFiles, boolean useIndex, Configuration conf, VolumeManager fs, ArrayList<FileSKVIterator> readers) throws IOException {
    long numKeys = 0;
    // count the total number of index entries
    for (String ref : mapFiles) {
        FileSKVIterator reader = null;
        Path path = new Path(ref);
        FileSystem ns = fs.getVolumeByPath(path).getFileSystem();
        try {
            if (useIndex)
                reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(path.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
            else
                reader = FileOperations.getInstance().newScanReaderBuilder().forFile(path.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).overRange(new Range(prevEndRow, false, null, true), LocalityGroupUtil.EMPTY_CF_SET, false).build();
            while (reader.hasTop()) {
                Key key = reader.getTopKey();
                if (endRow != null && key.compareRow(endRow) > 0)
                    break;
                else if (prevEndRow == null || key.compareRow(prevEndRow) > 0)
                    numKeys++;
                reader.next();
            }
        } finally {
            try {
                if (reader != null)
                    reader.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
            }
        }
        if (useIndex)
            readers.add(FileOperations.getInstance().newIndexReaderBuilder().forFile(path.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build());
        else
            readers.add(FileOperations.getInstance().newScanReaderBuilder().forFile(path.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).overRange(new Range(prevEndRow, false, null, true), LocalityGroupUtil.EMPTY_CF_SET, false).build());
    }
    return numKeys;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Aggregations

FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)32 Key (org.apache.accumulo.core.data.Key)22 FileSystem (org.apache.hadoop.fs.FileSystem)17 ArrayList (java.util.ArrayList)13 PartialKey (org.apache.accumulo.core.data.PartialKey)13 Value (org.apache.accumulo.core.data.Value)13 IOException (java.io.IOException)11 Configuration (org.apache.hadoop.conf.Configuration)10 Path (org.apache.hadoop.fs.Path)9 Range (org.apache.accumulo.core.data.Range)7 CachedConfiguration (org.apache.accumulo.core.util.CachedConfiguration)7 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)5 ConfigurationCopy (org.apache.accumulo.core.conf.ConfigurationCopy)5 SortedKeyValueIterator (org.apache.accumulo.core.iterators.SortedKeyValueIterator)5 MultiIterator (org.apache.accumulo.core.iterators.system.MultiIterator)5 Text (org.apache.hadoop.io.Text)5 Test (org.junit.Test)5 File (java.io.File)4 HashMap (java.util.HashMap)4 CryptoTest (org.apache.accumulo.core.security.crypto.CryptoTest)4