Search in sources :

Example 1 with DefaultIterable

use of voldemort.utils.DefaultIterable in project voldemort by voldemort.

the class ExternalSorter method sorted.

/**
     * Produce an iterator over the input values in sorted order. Sorting will
     * occur in the fixed space configured in the constructor, data will be
     * dumped to disk as necessary.
     * 
     * @param input An iterator over the input values
     * @return An iterator over the values
     */
public Iterable<V> sorted(Iterator<V> input) {
    ExecutorService executor = new ThreadPoolExecutor(this.numThreads, this.numThreads, 1000L, TimeUnit.MILLISECONDS, new SynchronousQueue<Runnable>(), new CallerRunsPolicy());
    final AtomicInteger count = new AtomicInteger(0);
    final List<File> tempFiles = Collections.synchronizedList(new ArrayList<File>());
    while (input.hasNext()) {
        final int segmentId = count.getAndIncrement();
        final long segmentStartMs = System.currentTimeMillis();
        logger.info("Segment " + segmentId + ": filling sort buffer for segment...");
        @SuppressWarnings("unchecked") final V[] buffer = (V[]) new Object[internalSortSize];
        int segmentSizeIter = 0;
        for (; segmentSizeIter < internalSortSize && input.hasNext(); segmentSizeIter++) buffer[segmentSizeIter] = input.next();
        final int segmentSize = segmentSizeIter;
        logger.info("Segment " + segmentId + ": sort buffer filled...adding to sort queue.");
        // sort and write out asynchronously
        executor.execute(new Runnable() {

            public void run() {
                logger.info("Segment " + segmentId + ": sorting buffer.");
                long start = System.currentTimeMillis();
                Arrays.sort(buffer, 0, segmentSize, comparator);
                long elapsed = System.currentTimeMillis() - start;
                logger.info("Segment " + segmentId + ": sort completed in " + elapsed + " ms, writing to temp file.");
                // write out values to a temp file
                try {
                    File tempFile = File.createTempFile("segment-", ".dat", tempDir);
                    tempFile.deleteOnExit();
                    tempFiles.add(tempFile);
                    OutputStream os = new BufferedOutputStream(new FileOutputStream(tempFile), bufferSize);
                    if (gzip)
                        os = new GZIPOutputStream(os);
                    DataOutputStream output = new DataOutputStream(os);
                    for (int i = 0; i < segmentSize; i++) writeValue(output, buffer[i]);
                    output.close();
                } catch (IOException e) {
                    throw new VoldemortException(e);
                }
                long segmentElapsed = System.currentTimeMillis() - segmentStartMs;
                logger.info("Segment " + segmentId + ": completed processing of segment in " + segmentElapsed + " ms.");
            }
        });
    }
    // wait for all sorting to complete
    executor.shutdown();
    try {
        executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
        // create iterator over sorted values
        return new DefaultIterable<V>(new ExternalSorterIterator(tempFiles, bufferSize / tempFiles.size()));
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }
}
Also used : CallerRunsPolicy(java.util.concurrent.ThreadPoolExecutor.CallerRunsPolicy) DataOutputStream(java.io.DataOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) DataOutputStream(java.io.DataOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) VoldemortException(voldemort.VoldemortException) GZIPOutputStream(java.util.zip.GZIPOutputStream) DefaultIterable(voldemort.utils.DefaultIterable) BufferedOutputStream(java.io.BufferedOutputStream) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FileOutputStream(java.io.FileOutputStream) ExecutorService(java.util.concurrent.ExecutorService) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) File(java.io.File)

Aggregations

BufferedOutputStream (java.io.BufferedOutputStream)1 DataOutputStream (java.io.DataOutputStream)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 ExecutorService (java.util.concurrent.ExecutorService)1 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)1 CallerRunsPolicy (java.util.concurrent.ThreadPoolExecutor.CallerRunsPolicy)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 VoldemortException (voldemort.VoldemortException)1 DefaultIterable (voldemort.utils.DefaultIterable)1