Search in sources :

Example 26 with FileChannel

use of java.nio.channels.FileChannel in project druid by druid-io.

the class LimitedTemporaryStorage method createFile.

/**
   * Create a new temporary file. All methods of the returned output stream may throw
   * {@link TemporaryStorageFullException} if the temporary storage area fills up.
   *
   * @return output stream to the file
   *
   * @throws TemporaryStorageFullException if the temporary storage area is full
   * @throws IOException                   if something goes wrong while creating the file
   */
public LimitedOutputStream createFile() throws IOException {
    if (bytesUsed.get() >= maxBytesUsed) {
        throw new TemporaryStorageFullException(maxBytesUsed);
    }
    synchronized (files) {
        if (closed) {
            throw new ISE("Closed");
        }
        FileUtils.forceMkdir(storageDirectory);
        final File theFile = new File(storageDirectory, String.format("%08d.tmp", files.size()));
        final EnumSet<StandardOpenOption> openOptions = EnumSet.of(StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE);
        final FileChannel channel = FileChannel.open(theFile.toPath(), openOptions);
        files.add(theFile);
        return new LimitedOutputStream(theFile, Channels.newOutputStream(channel));
    }
}
Also used : StandardOpenOption(java.nio.file.StandardOpenOption) FileChannel(java.nio.channels.FileChannel) ISE(io.druid.java.util.common.ISE) File(java.io.File)

Example 27 with FileChannel

use of java.nio.channels.FileChannel in project druid by druid-io.

the class IndexMerger method makeIndexFiles.

protected File makeIndexFiles(final List<IndexableAdapter> indexes, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
    List<Metadata> metadataList = Lists.transform(indexes, new Function<IndexableAdapter, Metadata>() {

        @Nullable
        @Override
        public Metadata apply(IndexableAdapter input) {
            return input.getMetadata();
        }
    });
    Metadata segmentMetadata = null;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    final Map<String, ValueType> valueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
    final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
    final Map<String, ColumnCapabilitiesImpl> columnCapabilities = Maps.newHashMap();
    final List<ColumnCapabilitiesImpl> dimCapabilities = new ArrayList<>();
    for (IndexableAdapter adapter : indexes) {
        for (String dimension : adapter.getDimensionNames()) {
            ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(dimension);
            ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
            if (mergedCapabilities == null) {
                mergedCapabilities = new ColumnCapabilitiesImpl();
            }
            columnCapabilities.put(dimension, mergedCapabilities.merge(capabilities));
        }
        for (String metric : adapter.getMetricNames()) {
            ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(metric);
            ColumnCapabilities capabilities = adapter.getCapabilities(metric);
            if (mergedCapabilities == null) {
                mergedCapabilities = new ColumnCapabilitiesImpl();
            }
            columnCapabilities.put(metric, mergedCapabilities.merge(capabilities));
            valueTypes.put(metric, capabilities.getType());
            metricTypeNames.put(metric, adapter.getMetricType(metric));
        }
    }
    for (String dimension : mergedDimensions) {
        dimCapabilities.add(columnCapabilities.get(dimension));
    }
    Closer closer = Closer.create();
    try {
        final Interval dataInterval;
        final File v8OutDir = new File(outDir, "v8-tmp");
        FileUtils.forceMkdir(v8OutDir);
        registerDeleteDirectory(closer, v8OutDir);
        File tmpPeonFilesDir = new File(v8OutDir, "tmpPeonFiles");
        FileUtils.forceMkdir(tmpPeonFilesDir);
        registerDeleteDirectory(closer, tmpPeonFilesDir);
        final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, true);
        closer.register(ioPeon);
        /*************  Main index.drd file **************/
        progress.progress();
        long startTime = System.currentTimeMillis();
        File indexFile = new File(v8OutDir, "index.drd");
        try (FileOutputStream fileOutputStream = new FileOutputStream(indexFile);
            FileChannel channel = fileOutputStream.getChannel()) {
            channel.write(ByteBuffer.wrap(new byte[] { IndexIO.V8_VERSION }));
            GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
            GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
            DateTime minTime = new DateTime(JodaUtils.MAX_INSTANT);
            DateTime maxTime = new DateTime(JodaUtils.MIN_INSTANT);
            for (IndexableAdapter index : indexes) {
                minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart());
                maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd());
            }
            dataInterval = new Interval(minTime, maxTime);
            serializerUtils.writeString(channel, String.format("%s/%s", minTime, maxTime));
            serializerUtils.writeString(channel, mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory()));
        }
        IndexIO.checkFileSize(indexFile);
        log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        /************* Setup Dim Conversions **************/
        progress.progress();
        startTime = System.currentTimeMillis();
        final ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size());
        final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMerger> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerLegacy merger = handlers[i].makeLegacyMerger(indexSpec, v8OutDir, ioPeon, dimCapabilities.get(i), progress);
            mergers.add(merger);
            merger.writeMergedValueMetadata(indexes);
            FileOutputSupplier dimOut = new FileOutputSupplier(merger.makeDimFile(), true);
            merger.writeValueMetadataToFile(dimOut);
            dimOuts.add(dimOut);
        }
        log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        /************* Walk through data sets and merge them *************/
        progress.progress();
        startTime = System.currentTimeMillis();
        Iterable<Rowboat> theRows = makeRowIterable(indexes, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
        LongSupplierSerializer timeWriter = CompressionFactory.getLongSerializer(ioPeon, "little_end_time", IndexIO.BYTE_ORDER, indexSpec.getLongEncoding(), CompressedObjectStrategy.DEFAULT_COMPRESSION_STRATEGY);
        timeWriter.open();
        ArrayList<MetricColumnSerializer> metWriters = Lists.newArrayListWithCapacity(mergedMetrics.size());
        final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression();
        final CompressionFactory.LongEncodingStrategy longEncoding = indexSpec.getLongEncoding();
        for (String metric : mergedMetrics) {
            ValueType type = valueTypes.get(metric);
            switch(type) {
                case LONG:
                    metWriters.add(new LongMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression, longEncoding));
                    break;
                case FLOAT:
                    metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression));
                    break;
                case COMPLEX:
                    final String typeName = metricTypeNames.get(metric);
                    ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
                    if (serde == null) {
                        throw new ISE("Unknown type[%s]", typeName);
                    }
                    metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde));
                    break;
                default:
                    throw new ISE("Unknown type[%s]", type);
            }
        }
        for (MetricColumnSerializer metWriter : metWriters) {
            metWriter.open();
        }
        int rowCount = 0;
        long time = System.currentTimeMillis();
        List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(indexes.size());
        for (IndexableAdapter index : indexes) {
            int[] arr = new int[index.getNumRows()];
            Arrays.fill(arr, INVALID_ROW);
            rowNumConversions.add(IntBuffer.wrap(arr));
        }
        for (Rowboat theRow : theRows) {
            progress.progress();
            timeWriter.add(theRow.getTimestamp());
            final Object[] metrics = theRow.getMetrics();
            for (int i = 0; i < metrics.length; ++i) {
                metWriters.get(i).serialize(metrics[i]);
            }
            Object[] dims = theRow.getDims();
            for (int i = 0; i < dims.length; ++i) {
                mergers.get(i).processMergedRow(dims[i]);
            }
            for (Map.Entry<Integer, TreeSet<Integer>> comprisedRow : theRow.getComprisedRows().entrySet()) {
                final IntBuffer conversionBuffer = rowNumConversions.get(comprisedRow.getKey());
                for (Integer rowNum : comprisedRow.getValue()) {
                    while (conversionBuffer.position() < rowNum) {
                        conversionBuffer.put(INVALID_ROW);
                    }
                    conversionBuffer.put(rowCount);
                }
            }
            if ((++rowCount % 500000) == 0) {
                log.info("outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time);
                time = System.currentTimeMillis();
            }
        }
        for (IntBuffer rowNumConversion : rowNumConversions) {
            rowNumConversion.rewind();
        }
        final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER);
        timeFile.delete();
        ByteSink out = Files.asByteSink(timeFile, FileWriteMode.APPEND);
        timeWriter.closeAndConsolidate(out);
        IndexIO.checkFileSize(timeFile);
        for (MetricColumnSerializer metWriter : metWriters) {
            metWriter.close();
        }
        log.info("outDir[%s] completed walk through of %,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - startTime);
        /************ Create Inverted Indexes and Finalize Columns *************/
        startTime = System.currentTimeMillis();
        final File invertedFile = new File(v8OutDir, "inverted.drd");
        Files.touch(invertedFile);
        out = Files.asByteSink(invertedFile, FileWriteMode.APPEND);
        final File geoFile = new File(v8OutDir, "spatial.drd");
        Files.touch(geoFile);
        OutputSupplier<FileOutputStream> spatialOut = Files.newOutputStreamSupplier(geoFile, true);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerLegacy legacyMerger = (DimensionMergerLegacy) mergers.get(i);
            legacyMerger.writeIndexes(rowNumConversions, closer);
            legacyMerger.writeIndexesToFiles(out, spatialOut);
            legacyMerger.writeRowValuesToFile(dimOuts.get(i));
        }
        log.info("outDir[%s] completed inverted.drd and wrote dimensions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        final Function<String, String> dimFilenameFunction = new Function<String, String>() {

            @Override
            public String apply(@Nullable String input) {
                String formatString;
                if (columnCapabilities.get(input).isDictionaryEncoded()) {
                    formatString = "dim_%s.drd";
                } else {
                    formatString = String.format("numeric_dim_%%s_%s.drd", IndexIO.BYTE_ORDER);
                }
                return GuavaUtils.formatFunction(formatString).apply(input);
            }
        };
        final ArrayList<String> expectedFiles = Lists.newArrayList(Iterables.concat(Arrays.asList("index.drd", "inverted.drd", "spatial.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER)), Iterables.transform(mergedDimensions, dimFilenameFunction), Iterables.transform(mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER)))));
        if (segmentMetadata != null) {
            writeMetadataToFile(new File(v8OutDir, "metadata.drd"), segmentMetadata);
            log.info("wrote metadata.drd in outDir[%s].", v8OutDir);
            expectedFiles.add("metadata.drd");
        }
        Map<String, File> files = Maps.newLinkedHashMap();
        for (String fileName : expectedFiles) {
            files.put(fileName, new File(v8OutDir, fileName));
        }
        File smooshDir = new File(v8OutDir, "smoosher");
        FileUtils.forceMkdir(smooshDir);
        for (Map.Entry<String, File> entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) {
            entry.getValue().delete();
        }
        for (File file : smooshDir.listFiles()) {
            Files.move(file, new File(v8OutDir, file.getName()));
        }
        if (!smooshDir.delete()) {
            log.info("Unable to delete temporary dir[%s], contains[%s]", smooshDir, Arrays.asList(smooshDir.listFiles()));
            throw new IOException(String.format("Unable to delete temporary dir[%s]", smooshDir));
        }
        createIndexDrdFile(IndexIO.V8_VERSION, v8OutDir, GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY), GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY), dataInterval, indexSpec.getBitmapSerdeFactory());
        indexIO.getDefaultIndexIOHandler().convertV8toV9(v8OutDir, outDir, indexSpec);
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) CompressedObjectStrategy(io.druid.segment.data.CompressedObjectStrategy) DateTime(org.joda.time.DateTime) ComplexMetricColumnSerializer(io.druid.segment.serde.ComplexMetricColumnSerializer) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) TreeSet(java.util.TreeSet) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File) Map(java.util.Map) Nullable(javax.annotation.Nullable) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl) Interval(org.joda.time.Interval) ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities) Function(com.google.common.base.Function) ByteSink(com.google.common.io.ByteSink) ISE(io.druid.java.util.common.ISE) ComplexMetricColumnSerializer(io.druid.segment.serde.ComplexMetricColumnSerializer) Closer(com.google.common.io.Closer) ValueType(io.druid.segment.column.ValueType) FileChannel(java.nio.channels.FileChannel) IOException(java.io.IOException) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CompressionFactory(io.druid.segment.data.CompressionFactory) FileOutputSupplier(io.druid.common.guava.FileOutputSupplier) LongSupplierSerializer(io.druid.segment.data.LongSupplierSerializer)

Example 28 with FileChannel

use of java.nio.channels.FileChannel in project druid by druid-io.

the class IndexMerger method createIndexDrdFile.

public void createIndexDrdFile(byte versionId, File inDir, GenericIndexed<String> availableDimensions, GenericIndexed<String> availableMetrics, Interval dataInterval, BitmapSerdeFactory bitmapSerdeFactory) throws IOException {
    File indexFile = new File(inDir, "index.drd");
    try (FileChannel channel = new FileOutputStream(indexFile).getChannel()) {
        channel.write(ByteBuffer.wrap(new byte[] { versionId }));
        availableDimensions.writeToChannel(channel);
        availableMetrics.writeToChannel(channel);
        serializerUtils.writeString(channel, String.format("%s/%s", dataInterval.getStart(), dataInterval.getEnd()));
        serializerUtils.writeString(channel, mapper.writeValueAsString(bitmapSerdeFactory));
    }
    IndexIO.checkFileSize(indexFile);
}
Also used : FileChannel(java.nio.channels.FileChannel) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 29 with FileChannel

use of java.nio.channels.FileChannel in project buck by facebook.

the class AbstractElfDynamicSectionScrubberStep method execute.

@Override
public StepExecutionResult execute(ExecutionContext context) throws IOException {
    try (FileChannel channel = FileChannel.open(getFilesystem().resolve(getPath()), StandardOpenOption.READ, StandardOpenOption.WRITE)) {
        MappedByteBuffer buffer = channel.map(READ_WRITE, 0, channel.size());
        Elf elf = new Elf(buffer);
        Optional<ElfSection> section = elf.getSectionByName(SECTION).map(Pair::getSecond);
        if (!section.isPresent()) {
            throw new IOException(String.format("Error parsing ELF file %s: no such section \"%s\"", getPath(), SECTION));
        }
        for (ByteBuffer body = section.get().body; body.hasRemaining(); ) {
            ElfDynamicSection.DTag dTag = ElfDynamicSection.DTag.valueOf(elf.header.ei_class == ElfHeader.EIClass.ELFCLASS32 ? Elf.Elf32.getElf32Sword(body) : (int) Elf.Elf64.getElf64Sxword(body));
            if (!WHITELISTED_TAGS.contains(dTag)) {
                if (elf.header.ei_class == ElfHeader.EIClass.ELFCLASS32) {
                    // d_ptr
                    Elf.Elf32.putElf32Addr(body, 0);
                } else {
                    // d_ptr
                    Elf.Elf64.putElf64Addr(body, 0);
                }
            } else {
                if (elf.header.ei_class == ElfHeader.EIClass.ELFCLASS32) {
                    // d_ptr
                    Elf.Elf32.getElf32Addr(body);
                } else {
                    // d_ptr
                    Elf.Elf64.getElf64Addr(body);
                }
            }
        }
    }
    return StepExecutionResult.SUCCESS;
}
Also used : ElfDynamicSection(com.facebook.buck.cxx.elf.ElfDynamicSection) MappedByteBuffer(java.nio.MappedByteBuffer) FileChannel(java.nio.channels.FileChannel) IOException(java.io.IOException) ElfSection(com.facebook.buck.cxx.elf.ElfSection) ByteBuffer(java.nio.ByteBuffer) MappedByteBuffer(java.nio.MappedByteBuffer) Elf(com.facebook.buck.cxx.elf.Elf) Pair(com.facebook.buck.model.Pair)

Example 30 with FileChannel

use of java.nio.channels.FileChannel in project buck by facebook.

the class AbstractElfExtractSectionsStep method getNewSectionAddresses.

// We want to compact the sections into the new ELF file, so find out the new addresses of each
// section.
private ImmutableMap<String, Long> getNewSectionAddresses() throws IOException {
    ImmutableMap.Builder<String, Long> addresses = ImmutableMap.builder();
    try (FileChannel channel = FileChannel.open(getFilesystem().resolve(getInput()), StandardOpenOption.READ)) {
        MappedByteBuffer buffer = channel.map(READ_ONLY, 0, channel.size());
        Elf elf = new Elf(buffer);
        // We start placing sections right after the program headers.
        long end = elf.header.e_phoff + elf.header.e_phnum * elf.header.e_phentsize;
        for (int index = 0; index < elf.getNumberOfSections(); index++) {
            ElfSection section = elf.getSectionByIndex(index);
            String name = elf.getSectionName(section.header);
            // address by this sections size.
            if (getSections().contains(name)) {
                addresses.put(name, end);
                end += section.header.sh_size;
            }
        }
    }
    return addresses.build();
}
Also used : MappedByteBuffer(java.nio.MappedByteBuffer) FileChannel(java.nio.channels.FileChannel) ElfSection(com.facebook.buck.cxx.elf.ElfSection) ImmutableMap(com.google.common.collect.ImmutableMap) Elf(com.facebook.buck.cxx.elf.Elf)

Aggregations

FileChannel (java.nio.channels.FileChannel)629 IOException (java.io.IOException)227 ByteBuffer (java.nio.ByteBuffer)205 File (java.io.File)185 FileInputStream (java.io.FileInputStream)164 FileOutputStream (java.io.FileOutputStream)147 RandomAccessFile (java.io.RandomAccessFile)144 Test (org.junit.Test)95 MappedByteBuffer (java.nio.MappedByteBuffer)78 Path (java.nio.file.Path)37 FileLock (java.nio.channels.FileLock)32 FileNotFoundException (java.io.FileNotFoundException)29 Random (java.util.Random)12 OutputStream (java.io.OutputStream)11 ArrayList (java.util.ArrayList)11 AsynchronousFileChannel (java.nio.channels.AsynchronousFileChannel)10 OverlappingFileLockException (java.nio.channels.OverlappingFileLockException)10 LinkedList (java.util.LinkedList)10 ProjectWorkspace (com.facebook.buck.testutil.integration.ProjectWorkspace)9 BufferedReader (java.io.BufferedReader)9