Search in sources :

Example 11 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class FileUtil method countIndexEntries.

private static long countIndexEntries(ServerContext context, Text prevEndRow, Text endRow, Collection<TabletFile> mapFiles, boolean useIndex, ArrayList<FileSKVIterator> readers) throws IOException {
    AccumuloConfiguration acuConf = context.getConfiguration();
    long numKeys = 0;
    // count the total number of index entries
    for (TabletFile file : mapFiles) {
        FileSKVIterator reader = null;
        FileSystem ns = context.getVolumeManager().getFileSystemByPath(file.getPath());
        try {
            if (useIndex)
                reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(file.getPathStr(), ns, ns.getConf(), context.getCryptoService()).withTableConfiguration(acuConf).build();
            else
                reader = FileOperations.getInstance().newScanReaderBuilder().forFile(file.getPathStr(), ns, ns.getConf(), context.getCryptoService()).withTableConfiguration(acuConf).overRange(new Range(prevEndRow, false, null, true), Set.of(), false).build();
            while (reader.hasTop()) {
                Key key = reader.getTopKey();
                if (endRow != null && key.compareRow(endRow) > 0)
                    break;
                else if (prevEndRow == null || key.compareRow(prevEndRow) > 0)
                    numKeys++;
                reader.next();
            }
        } finally {
            try {
                if (reader != null)
                    reader.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
            }
        }
        if (useIndex)
            readers.add(FileOperations.getInstance().newIndexReaderBuilder().forFile(file.getPathStr(), ns, ns.getConf(), context.getCryptoService()).withTableConfiguration(acuConf).build());
        else
            readers.add(FileOperations.getInstance().newScanReaderBuilder().forFile(file.getPathStr(), ns, ns.getConf(), context.getCryptoService()).withTableConfiguration(acuConf).overRange(new Range(prevEndRow, false, null, true), Set.of(), false).build());
    }
    return numKeys;
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) FileSystem(org.apache.hadoop.fs.FileSystem) TabletFile(org.apache.accumulo.core.metadata.TabletFile) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration)

Example 12 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class FileUtil method tryToGetFirstAndLastRows.

public static Map<TabletFile, FileInfo> tryToGetFirstAndLastRows(ServerContext context, Set<TabletFile> mapfiles) {
    HashMap<TabletFile, FileInfo> mapFilesInfo = new HashMap<>();
    long t1 = System.currentTimeMillis();
    for (TabletFile mapfile : mapfiles) {
        FileSKVIterator reader = null;
        FileSystem ns = context.getVolumeManager().getFileSystemByPath(mapfile.getPath());
        try {
            reader = FileOperations.getInstance().newReaderBuilder().forFile(mapfile.getPathStr(), ns, ns.getConf(), context.getCryptoService()).withTableConfiguration(context.getConfiguration()).build();
            Key firstKey = reader.getFirstKey();
            if (firstKey != null) {
                mapFilesInfo.put(mapfile, new FileInfo(firstKey, reader.getLastKey()));
            }
        } catch (IOException ioe) {
            log.warn("Failed to read map file to determine first and last key : " + mapfile, ioe);
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException ioe) {
                    log.warn("failed to close " + mapfile, ioe);
                }
            }
        }
    }
    long t2 = System.currentTimeMillis();
    log.debug(String.format("Found first and last keys for %d map files in %6.2f secs", mapfiles.size(), (t2 - t1) / 1000.0));
    return mapFilesInfo;
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) HashMap(java.util.HashMap) FileSystem(org.apache.hadoop.fs.FileSystem) TabletFile(org.apache.accumulo.core.metadata.TabletFile) IOException(java.io.IOException) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 13 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class FileUtil method findLastKey.

public static WritableComparable<Key> findLastKey(ServerContext context, Collection<TabletFile> mapFiles) throws IOException {
    Key lastKey = null;
    for (TabletFile file : mapFiles) {
        FileSystem ns = context.getVolumeManager().getFileSystemByPath(file.getPath());
        FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(file.getPathStr(), ns, ns.getConf(), context.getCryptoService()).withTableConfiguration(context.getConfiguration()).seekToBeginning().build();
        try {
            if (!reader.hasTop())
                // file is empty, so there is no last key
                continue;
            Key key = reader.getLastKey();
            if (lastKey == null || key.compareTo(lastKey) > 0)
                lastKey = key;
        } finally {
            try {
                if (reader != null)
                    reader.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
            }
        }
    }
    return lastKey;
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) FileSystem(org.apache.hadoop.fs.FileSystem) TabletFile(org.apache.accumulo.core.metadata.TabletFile) IOException(java.io.IOException) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 14 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class FileUtil method reduceFiles.

public static Collection<TabletFile> reduceFiles(ServerContext context, Configuration conf, Text prevEndRow, Text endRow, Collection<TabletFile> mapFiles, int maxFiles, Path tmpDir, int pass) throws IOException {
    AccumuloConfiguration acuConf = context.getConfiguration();
    ArrayList<TabletFile> paths = new ArrayList<>(mapFiles);
    if (paths.size() <= maxFiles)
        return paths;
    String newDir = String.format("%s/pass_%04d", tmpDir, pass);
    int start = 0;
    ArrayList<TabletFile> outFiles = new ArrayList<>();
    int count = 0;
    while (start < paths.size()) {
        int end = Math.min(maxFiles + start, paths.size());
        List<TabletFile> inFiles = paths.subList(start, end);
        start = end;
        TabletFile newMapFile = new TabletFile(new Path(String.format("%s/%04d.%s", newDir, count++, RFile.EXTENSION)));
        outFiles.add(newMapFile);
        FileSystem ns = context.getVolumeManager().getFileSystemByPath(newMapFile.getPath());
        FileSKVWriter writer = new RFileOperations().newWriterBuilder().forFile(newMapFile.getPathStr(), ns, ns.getConf(), context.getCryptoService()).withTableConfiguration(acuConf).build();
        writer.startDefaultLocalityGroup();
        List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(inFiles.size());
        FileSKVIterator reader = null;
        try {
            for (TabletFile file : inFiles) {
                ns = context.getVolumeManager().getFileSystemByPath(file.getPath());
                reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(file.getPathStr(), ns, ns.getConf(), context.getCryptoService()).withTableConfiguration(acuConf).build();
                iters.add(reader);
            }
            MultiIterator mmfi = new MultiIterator(iters, true);
            while (mmfi.hasTop()) {
                Key key = mmfi.getTopKey();
                boolean gtPrevEndRow = prevEndRow == null || key.compareRow(prevEndRow) > 0;
                boolean lteEndRow = endRow == null || key.compareRow(endRow) <= 0;
                if (gtPrevEndRow && lteEndRow)
                    writer.append(key, new Value());
                if (!lteEndRow)
                    break;
                mmfi.next();
            }
        } finally {
            try {
                if (reader != null)
                    reader.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
            }
            for (SortedKeyValueIterator<Key, Value> r : iters) try {
                if (r != null)
                    ((FileSKVIterator) r).close();
            } catch (IOException e) {
                // continue closing
                log.error("{}", e.getMessage(), e);
            }
            try {
                writer.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
                throw e;
            }
        }
    }
    return reduceFiles(context, conf, prevEndRow, endRow, outFiles, maxFiles, tmpDir, pass + 1);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) MultiIterator(org.apache.accumulo.core.iteratorsImpl.system.MultiIterator) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) IOException(java.io.IOException) RFileOperations(org.apache.accumulo.core.file.rfile.RFileOperations) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) TabletFile(org.apache.accumulo.core.metadata.TabletFile) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration)

Example 15 with TabletFile

use of org.apache.accumulo.core.metadata.TabletFile in project accumulo by apache.

the class Tablet method split.

public TreeMap<KeyExtent, TabletData> split(byte[] sp) throws IOException {
    if (sp != null && extent.endRow() != null && extent.endRow().equals(new Text(sp))) {
        throw new IllegalArgumentException("Attempting to split on EndRow " + extent.endRow() + " for " + extent);
    }
    if (sp != null && sp.length > tableConfiguration.getAsBytes(Property.TABLE_MAX_END_ROW_SIZE)) {
        String msg = "Cannot split tablet " + extent + ", selected split point too long.  Length :  " + sp.length;
        log.warn(msg);
        throw new IOException(msg);
    }
    if (extent.isRootTablet()) {
        String msg = "Cannot split root tablet";
        log.warn(msg);
        throw new RuntimeException(msg);
    }
    try {
        initiateClose(true);
    } catch (IllegalStateException ise) {
        log.debug("File {} not splitting : {}", extent, ise.getMessage());
        return null;
    }
    // obtain this info outside of synch block since it will involve opening
    // the map files... it is ok if the set of map files changes, because
    // this info is used for optimization... it is ok if map files are missing
    // from the set... can still query and insert into the tablet while this
    // map file operation is happening
    Map<TabletFile, FileUtil.FileInfo> firstAndLastRows = FileUtil.tryToGetFirstAndLastRows(context, getDatafileManager().getFiles());
    synchronized (this) {
        // java needs tuples ...
        TreeMap<KeyExtent, TabletData> newTablets = new TreeMap<>();
        long t1 = System.currentTimeMillis();
        // choose a split point
        SplitRowSpec splitPoint;
        if (sp == null) {
            splitPoint = findSplitRow(getDatafileManager().getFiles());
        } else {
            Text tsp = new Text(sp);
            splitPoint = new SplitRowSpec(FileUtil.estimatePercentageLTE(context, chooseTabletDir(), extent.prevEndRow(), extent.endRow(), getDatafileManager().getFiles(), tsp), tsp);
        }
        if (splitPoint == null || splitPoint.row == null) {
            log.info("had to abort split because splitRow was null");
            closeState = CloseState.OPEN;
            return null;
        }
        closeState = CloseState.CLOSING;
        completeClose(true, false);
        Text midRow = splitPoint.row;
        double splitRatio = splitPoint.splitRatio;
        KeyExtent low = new KeyExtent(extent.tableId(), midRow, extent.prevEndRow());
        KeyExtent high = new KeyExtent(extent.tableId(), extent.endRow(), midRow);
        String lowDirectoryName = createTabletDirectoryName(context, midRow);
        // write new tablet information to MetadataTable
        SortedMap<StoredTabletFile, DataFileValue> lowDatafileSizes = new TreeMap<>();
        SortedMap<StoredTabletFile, DataFileValue> highDatafileSizes = new TreeMap<>();
        List<StoredTabletFile> highDatafilesToRemove = new ArrayList<>();
        MetadataTableUtil.splitDatafiles(midRow, splitRatio, firstAndLastRows, getDatafileManager().getDatafileSizes(), lowDatafileSizes, highDatafileSizes, highDatafilesToRemove);
        log.debug("Files for low split {} {}", low, lowDatafileSizes.keySet());
        log.debug("Files for high split {} {}", high, highDatafileSizes.keySet());
        MetadataTime time = tabletTime.getMetadataTime();
        HashSet<ExternalCompactionId> ecids = new HashSet<>();
        compactable.getExternalCompactionIds(ecids::add);
        MetadataTableUtil.splitTablet(high, extent.prevEndRow(), splitRatio, getTabletServer().getContext(), getTabletServer().getLock(), ecids);
        ManagerMetadataUtil.addNewTablet(getTabletServer().getContext(), low, lowDirectoryName, getTabletServer().getTabletSession(), lowDatafileSizes, bulkImported, time, lastFlushID, lastCompactID, getTabletServer().getLock());
        MetadataTableUtil.finishSplit(high, highDatafileSizes, highDatafilesToRemove, getTabletServer().getContext(), getTabletServer().getLock());
        TabletLogger.split(extent, low, high, getTabletServer().getTabletSession());
        newTablets.put(high, new TabletData(dirName, highDatafileSizes, time, lastFlushID, lastCompactID, lastLocation, bulkImported));
        newTablets.put(low, new TabletData(lowDirectoryName, lowDatafileSizes, time, lastFlushID, lastCompactID, lastLocation, bulkImported));
        long t2 = System.currentTimeMillis();
        log.debug(String.format("offline split time : %6.2f secs", (t2 - t1) / 1000.0));
        closeState = CloseState.COMPLETE;
        return newTablets;
    }
}
Also used : ExternalCompactionId(org.apache.accumulo.core.metadata.schema.ExternalCompactionId) ArrayList(java.util.ArrayList) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) MapFileInfo(org.apache.accumulo.core.dataImpl.thrift.MapFileInfo) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) TabletFile(org.apache.accumulo.core.metadata.TabletFile) HashSet(java.util.HashSet) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) TreeMap(java.util.TreeMap) StoredTabletFile(org.apache.accumulo.core.metadata.StoredTabletFile) MetadataTime(org.apache.accumulo.core.metadata.schema.MetadataTime)

Aggregations

TabletFile (org.apache.accumulo.core.metadata.TabletFile)36 StoredTabletFile (org.apache.accumulo.core.metadata.StoredTabletFile)20 IOException (java.io.IOException)17 Path (org.apache.hadoop.fs.Path)15 ArrayList (java.util.ArrayList)14 DataFileValue (org.apache.accumulo.core.metadata.schema.DataFileValue)14 Key (org.apache.accumulo.core.data.Key)13 FileSystem (org.apache.hadoop.fs.FileSystem)13 HashMap (java.util.HashMap)12 TreeMap (java.util.TreeMap)10 Value (org.apache.accumulo.core.data.Value)8 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)8 HashSet (java.util.HashSet)6 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)6 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)6 SortedKeyValueIterator (org.apache.accumulo.core.iterators.SortedKeyValueIterator)6 Text (org.apache.hadoop.io.Text)6 Map (java.util.Map)5 PartialKey (org.apache.accumulo.core.data.PartialKey)5 List (java.util.List)4