Search in sources :

Example 1 with RFileOperations

use of org.apache.accumulo.core.file.rfile.RFileOperations in project accumulo by apache.

the class InMemoryMap method delete.

public void delete(long waitTime) {
    synchronized (this) {
        if (deleted)
            throw new IllegalStateException("Double delete");
        deleted = true;
    }
    long t1 = System.currentTimeMillis();
    while (activeIters.size() > 0 && System.currentTimeMillis() - t1 < waitTime) {
        sleepUninterruptibly(50, TimeUnit.MILLISECONDS);
    }
    if (activeIters.size() > 0) {
        // dump memmap exactly as is to a tmp file on disk, and switch scans to that temp file
        try {
            Configuration conf = CachedConfiguration.getInstance();
            FileSystem fs = FileSystem.getLocal(conf);
            String tmpFile = memDumpDir + "/memDump" + UUID.randomUUID() + "." + RFile.EXTENSION;
            Configuration newConf = new Configuration(conf);
            newConf.setInt("io.seqfile.compress.blocksize", 100000);
            AccumuloConfiguration siteConf = SiteConfiguration.getInstance();
            if (getOrCreateSampler() != null) {
                siteConf = createSampleConfig(siteConf);
            }
            FileSKVWriter out = new RFileOperations().newWriterBuilder().forFile(tmpFile, fs, newConf).withTableConfiguration(siteConf).build();
            InterruptibleIterator iter = map.skvIterator(null);
            HashSet<ByteSequence> allfams = new HashSet<>();
            for (Entry<String, Set<ByteSequence>> entry : lggroups.entrySet()) {
                allfams.addAll(entry.getValue());
                out.startNewLocalityGroup(entry.getKey(), entry.getValue());
                iter.seek(new Range(), entry.getValue(), true);
                dumpLocalityGroup(out, iter);
            }
            out.startDefaultLocalityGroup();
            iter.seek(new Range(), allfams, false);
            dumpLocalityGroup(out, iter);
            out.close();
            log.debug("Created mem dump file {}", tmpFile);
            memDumpFile = tmpFile;
            synchronized (activeIters) {
                for (MemoryIterator mi : activeIters) {
                    mi.switchNow();
                }
            }
            // rely on unix behavior that file will be deleted when last
            // reader closes it
            fs.delete(new Path(memDumpFile), true);
        } catch (IOException ioe) {
            log.error("Failed to create mem dump file", ioe);
            while (activeIters.size() > 0) {
                sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            }
        }
    }
    SimpleMap tmpMap = map;
    synchronized (this) {
        map = null;
    }
    tmpMap.delete();
}
Also used : Path(org.apache.hadoop.fs.Path) Set(java.util.Set) HashSet(java.util.HashSet) Configuration(org.apache.hadoop.conf.Configuration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) SiteConfiguration(org.apache.accumulo.core.conf.SiteConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) InterruptibleIterator(org.apache.accumulo.core.iterators.system.InterruptibleIterator) RFileOperations(org.apache.accumulo.core.file.rfile.RFileOperations) FileSystem(org.apache.hadoop.fs.FileSystem) ByteSequence(org.apache.accumulo.core.data.ByteSequence) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) HashSet(java.util.HashSet)

Example 2 with RFileOperations

use of org.apache.accumulo.core.file.rfile.RFileOperations in project accumulo by apache.

the class FileUtil method reduceFiles.

public static Collection<String> reduceFiles(AccumuloConfiguration acuConf, Configuration conf, VolumeManager fs, Text prevEndRow, Text endRow, Collection<String> mapFiles, int maxFiles, Path tmpDir, int pass) throws IOException {
    ArrayList<String> paths = new ArrayList<>(mapFiles);
    if (paths.size() <= maxFiles)
        return paths;
    String newDir = String.format("%s/pass_%04d", tmpDir, pass);
    int start = 0;
    ArrayList<String> outFiles = new ArrayList<>();
    int count = 0;
    while (start < paths.size()) {
        int end = Math.min(maxFiles + start, paths.size());
        List<String> inFiles = paths.subList(start, end);
        start = end;
        String newMapFile = String.format("%s/%04d.%s", newDir, count++, RFile.EXTENSION);
        outFiles.add(newMapFile);
        FileSystem ns = fs.getVolumeByPath(new Path(newMapFile)).getFileSystem();
        FileSKVWriter writer = new RFileOperations().newWriterBuilder().forFile(newMapFile.toString(), ns, ns.getConf()).withTableConfiguration(acuConf).build();
        writer.startDefaultLocalityGroup();
        List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(inFiles.size());
        FileSKVIterator reader = null;
        try {
            for (String s : inFiles) {
                ns = fs.getVolumeByPath(new Path(s)).getFileSystem();
                reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(s, ns, ns.getConf()).withTableConfiguration(acuConf).build();
                iters.add(reader);
            }
            MultiIterator mmfi = new MultiIterator(iters, true);
            while (mmfi.hasTop()) {
                Key key = mmfi.getTopKey();
                boolean gtPrevEndRow = prevEndRow == null || key.compareRow(prevEndRow) > 0;
                boolean lteEndRow = endRow == null || key.compareRow(endRow) <= 0;
                if (gtPrevEndRow && lteEndRow)
                    writer.append(key, new Value(new byte[0]));
                if (!lteEndRow)
                    break;
                mmfi.next();
            }
        } finally {
            try {
                if (reader != null)
                    reader.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
            }
            for (SortedKeyValueIterator<Key, Value> r : iters) try {
                if (r != null)
                    ((FileSKVIterator) r).close();
            } catch (IOException e) {
                // continue closing
                log.error("{}", e.getMessage(), e);
            }
            try {
                writer.close();
            } catch (IOException e) {
                log.error("{}", e.getMessage(), e);
                throw e;
            }
        }
    }
    return reduceFiles(acuConf, conf, fs, prevEndRow, endRow, outFiles, maxFiles, tmpDir, pass + 1);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) MultiIterator(org.apache.accumulo.core.iterators.system.MultiIterator) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) ArrayList(java.util.ArrayList) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) IOException(java.io.IOException) RFileOperations(org.apache.accumulo.core.file.rfile.RFileOperations) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 3 with RFileOperations

use of org.apache.accumulo.core.file.rfile.RFileOperations in project accumulo by apache.

the class CreateRandomRFile method main.

public static void main(String[] args) {
    if (args.length != 2) {
        System.err.println("Usage CreateRandomRFile <filename> <size>");
        System.exit(-1);
    }
    file = args[0];
    num = Integer.parseInt(args[1]);
    long[] rands = new long[num];
    Random r = new Random();
    for (int i = 0; i < rands.length; i++) {
        rands[i] = (r.nextLong() & 0x7fffffffffffffffl) % 10000000000l;
    }
    Arrays.sort(rands);
    Configuration conf = CachedConfiguration.getInstance();
    FileSKVWriter mfw;
    try {
        FileSystem fs = FileSystem.get(conf);
        mfw = new RFileOperations().newWriterBuilder().forFile(file, fs, conf).withTableConfiguration(DefaultConfiguration.getInstance()).build();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    for (int i = 0; i < rands.length; i++) {
        Text row = new Text(String.format("row_%010d", rands[i]));
        Key key = new Key(row);
        Value dv = new Value(createValue(rands[i], 40));
        try {
            mfw.append(key, dv);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
    try {
        mfw.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : RFileOperations(org.apache.accumulo.core.file.rfile.RFileOperations) Random(java.util.Random) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) Configuration(org.apache.hadoop.conf.Configuration) DefaultConfiguration(org.apache.accumulo.core.conf.DefaultConfiguration) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) Key(org.apache.accumulo.core.data.Key)

Example 4 with RFileOperations

use of org.apache.accumulo.core.file.rfile.RFileOperations in project accumulo by apache.

the class DispatchingFileFactory method findFileFactory.

private FileOperations findFileFactory(FileAccessOperation<?> options) {
    String file = options.getFilename();
    Path p = new Path(file);
    String name = p.getName();
    if (name.startsWith(Constants.MAPFILE_EXTENSION + "_")) {
        return new MapFileOperations();
    }
    String[] sp = name.split("\\.");
    if (sp.length < 2) {
        throw new IllegalArgumentException("File name " + name + " has no extension");
    }
    String extension = sp[sp.length - 1];
    if (extension.equals(Constants.MAPFILE_EXTENSION) || extension.equals(Constants.MAPFILE_EXTENSION + "_tmp")) {
        return new MapFileOperations();
    } else if (extension.equals(RFile.EXTENSION) || extension.equals(RFile.EXTENSION + "_tmp")) {
        return new RFileOperations();
    } else {
        throw new IllegalArgumentException("File type " + extension + " not supported");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) RFileOperations(org.apache.accumulo.core.file.rfile.RFileOperations) MapFileOperations(org.apache.accumulo.core.file.map.MapFileOperations)

Aggregations

RFileOperations (org.apache.accumulo.core.file.rfile.RFileOperations)4 IOException (java.io.IOException)3 FileSKVWriter (org.apache.accumulo.core.file.FileSKVWriter)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 Key (org.apache.accumulo.core.data.Key)2 Value (org.apache.accumulo.core.data.Value)2 CachedConfiguration (org.apache.accumulo.core.util.CachedConfiguration)2 Configuration (org.apache.hadoop.conf.Configuration)2 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 Random (java.util.Random)1 Set (java.util.Set)1 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)1 DefaultConfiguration (org.apache.accumulo.core.conf.DefaultConfiguration)1 SiteConfiguration (org.apache.accumulo.core.conf.SiteConfiguration)1 ByteSequence (org.apache.accumulo.core.data.ByteSequence)1 PartialKey (org.apache.accumulo.core.data.PartialKey)1 Range (org.apache.accumulo.core.data.Range)1 FileSKVIterator (org.apache.accumulo.core.file.FileSKVIterator)1