Search in sources :

Example 26 with FileOutputStream

use of java.io.FileOutputStream in project druid by druid-io.

the class IndexMerger method makeIndexFiles.

protected File makeIndexFiles(final List<IndexableAdapter> indexes, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
    List<Metadata> metadataList = Lists.transform(indexes, new Function<IndexableAdapter, Metadata>() {

        @Nullable
        @Override
        public Metadata apply(IndexableAdapter input) {
            return input.getMetadata();
        }
    });
    Metadata segmentMetadata = null;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    final Map<String, ValueType> valueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
    final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
    final Map<String, ColumnCapabilitiesImpl> columnCapabilities = Maps.newHashMap();
    final List<ColumnCapabilitiesImpl> dimCapabilities = new ArrayList<>();
    for (IndexableAdapter adapter : indexes) {
        for (String dimension : adapter.getDimensionNames()) {
            ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(dimension);
            ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
            if (mergedCapabilities == null) {
                mergedCapabilities = new ColumnCapabilitiesImpl();
            }
            columnCapabilities.put(dimension, mergedCapabilities.merge(capabilities));
        }
        for (String metric : adapter.getMetricNames()) {
            ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(metric);
            ColumnCapabilities capabilities = adapter.getCapabilities(metric);
            if (mergedCapabilities == null) {
                mergedCapabilities = new ColumnCapabilitiesImpl();
            }
            columnCapabilities.put(metric, mergedCapabilities.merge(capabilities));
            valueTypes.put(metric, capabilities.getType());
            metricTypeNames.put(metric, adapter.getMetricType(metric));
        }
    }
    for (String dimension : mergedDimensions) {
        dimCapabilities.add(columnCapabilities.get(dimension));
    }
    Closer closer = Closer.create();
    try {
        final Interval dataInterval;
        final File v8OutDir = new File(outDir, "v8-tmp");
        FileUtils.forceMkdir(v8OutDir);
        registerDeleteDirectory(closer, v8OutDir);
        File tmpPeonFilesDir = new File(v8OutDir, "tmpPeonFiles");
        FileUtils.forceMkdir(tmpPeonFilesDir);
        registerDeleteDirectory(closer, tmpPeonFilesDir);
        final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, true);
        closer.register(ioPeon);
        /*************  Main index.drd file **************/
        progress.progress();
        long startTime = System.currentTimeMillis();
        File indexFile = new File(v8OutDir, "index.drd");
        try (FileOutputStream fileOutputStream = new FileOutputStream(indexFile);
            FileChannel channel = fileOutputStream.getChannel()) {
            channel.write(ByteBuffer.wrap(new byte[] { IndexIO.V8_VERSION }));
            GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
            GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
            DateTime minTime = new DateTime(JodaUtils.MAX_INSTANT);
            DateTime maxTime = new DateTime(JodaUtils.MIN_INSTANT);
            for (IndexableAdapter index : indexes) {
                minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart());
                maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd());
            }
            dataInterval = new Interval(minTime, maxTime);
            serializerUtils.writeString(channel, String.format("%s/%s", minTime, maxTime));
            serializerUtils.writeString(channel, mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory()));
        }
        IndexIO.checkFileSize(indexFile);
        log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        /************* Setup Dim Conversions **************/
        progress.progress();
        startTime = System.currentTimeMillis();
        final ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size());
        final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMerger> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerLegacy merger = handlers[i].makeLegacyMerger(indexSpec, v8OutDir, ioPeon, dimCapabilities.get(i), progress);
            mergers.add(merger);
            merger.writeMergedValueMetadata(indexes);
            FileOutputSupplier dimOut = new FileOutputSupplier(merger.makeDimFile(), true);
            merger.writeValueMetadataToFile(dimOut);
            dimOuts.add(dimOut);
        }
        log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        /************* Walk through data sets and merge them *************/
        progress.progress();
        startTime = System.currentTimeMillis();
        Iterable<Rowboat> theRows = makeRowIterable(indexes, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
        LongSupplierSerializer timeWriter = CompressionFactory.getLongSerializer(ioPeon, "little_end_time", IndexIO.BYTE_ORDER, indexSpec.getLongEncoding(), CompressedObjectStrategy.DEFAULT_COMPRESSION_STRATEGY);
        timeWriter.open();
        ArrayList<MetricColumnSerializer> metWriters = Lists.newArrayListWithCapacity(mergedMetrics.size());
        final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression();
        final CompressionFactory.LongEncodingStrategy longEncoding = indexSpec.getLongEncoding();
        for (String metric : mergedMetrics) {
            ValueType type = valueTypes.get(metric);
            switch(type) {
                case LONG:
                    metWriters.add(new LongMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression, longEncoding));
                    break;
                case FLOAT:
                    metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression));
                    break;
                case COMPLEX:
                    final String typeName = metricTypeNames.get(metric);
                    ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
                    if (serde == null) {
                        throw new ISE("Unknown type[%s]", typeName);
                    }
                    metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde));
                    break;
                default:
                    throw new ISE("Unknown type[%s]", type);
            }
        }
        for (MetricColumnSerializer metWriter : metWriters) {
            metWriter.open();
        }
        int rowCount = 0;
        long time = System.currentTimeMillis();
        List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(indexes.size());
        for (IndexableAdapter index : indexes) {
            int[] arr = new int[index.getNumRows()];
            Arrays.fill(arr, INVALID_ROW);
            rowNumConversions.add(IntBuffer.wrap(arr));
        }
        for (Rowboat theRow : theRows) {
            progress.progress();
            timeWriter.add(theRow.getTimestamp());
            final Object[] metrics = theRow.getMetrics();
            for (int i = 0; i < metrics.length; ++i) {
                metWriters.get(i).serialize(metrics[i]);
            }
            Object[] dims = theRow.getDims();
            for (int i = 0; i < dims.length; ++i) {
                mergers.get(i).processMergedRow(dims[i]);
            }
            for (Map.Entry<Integer, TreeSet<Integer>> comprisedRow : theRow.getComprisedRows().entrySet()) {
                final IntBuffer conversionBuffer = rowNumConversions.get(comprisedRow.getKey());
                for (Integer rowNum : comprisedRow.getValue()) {
                    while (conversionBuffer.position() < rowNum) {
                        conversionBuffer.put(INVALID_ROW);
                    }
                    conversionBuffer.put(rowCount);
                }
            }
            if ((++rowCount % 500000) == 0) {
                log.info("outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time);
                time = System.currentTimeMillis();
            }
        }
        for (IntBuffer rowNumConversion : rowNumConversions) {
            rowNumConversion.rewind();
        }
        final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER);
        timeFile.delete();
        ByteSink out = Files.asByteSink(timeFile, FileWriteMode.APPEND);
        timeWriter.closeAndConsolidate(out);
        IndexIO.checkFileSize(timeFile);
        for (MetricColumnSerializer metWriter : metWriters) {
            metWriter.close();
        }
        log.info("outDir[%s] completed walk through of %,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - startTime);
        /************ Create Inverted Indexes and Finalize Columns *************/
        startTime = System.currentTimeMillis();
        final File invertedFile = new File(v8OutDir, "inverted.drd");
        Files.touch(invertedFile);
        out = Files.asByteSink(invertedFile, FileWriteMode.APPEND);
        final File geoFile = new File(v8OutDir, "spatial.drd");
        Files.touch(geoFile);
        OutputSupplier<FileOutputStream> spatialOut = Files.newOutputStreamSupplier(geoFile, true);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerLegacy legacyMerger = (DimensionMergerLegacy) mergers.get(i);
            legacyMerger.writeIndexes(rowNumConversions, closer);
            legacyMerger.writeIndexesToFiles(out, spatialOut);
            legacyMerger.writeRowValuesToFile(dimOuts.get(i));
        }
        log.info("outDir[%s] completed inverted.drd and wrote dimensions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        final Function<String, String> dimFilenameFunction = new Function<String, String>() {

            @Override
            public String apply(@Nullable String input) {
                String formatString;
                if (columnCapabilities.get(input).isDictionaryEncoded()) {
                    formatString = "dim_%s.drd";
                } else {
                    formatString = String.format("numeric_dim_%%s_%s.drd", IndexIO.BYTE_ORDER);
                }
                return GuavaUtils.formatFunction(formatString).apply(input);
            }
        };
        final ArrayList<String> expectedFiles = Lists.newArrayList(Iterables.concat(Arrays.asList("index.drd", "inverted.drd", "spatial.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER)), Iterables.transform(mergedDimensions, dimFilenameFunction), Iterables.transform(mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER)))));
        if (segmentMetadata != null) {
            writeMetadataToFile(new File(v8OutDir, "metadata.drd"), segmentMetadata);
            log.info("wrote metadata.drd in outDir[%s].", v8OutDir);
            expectedFiles.add("metadata.drd");
        }
        Map<String, File> files = Maps.newLinkedHashMap();
        for (String fileName : expectedFiles) {
            files.put(fileName, new File(v8OutDir, fileName));
        }
        File smooshDir = new File(v8OutDir, "smoosher");
        FileUtils.forceMkdir(smooshDir);
        for (Map.Entry<String, File> entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) {
            entry.getValue().delete();
        }
        for (File file : smooshDir.listFiles()) {
            Files.move(file, new File(v8OutDir, file.getName()));
        }
        if (!smooshDir.delete()) {
            log.info("Unable to delete temporary dir[%s], contains[%s]", smooshDir, Arrays.asList(smooshDir.listFiles()));
            throw new IOException(String.format("Unable to delete temporary dir[%s]", smooshDir));
        }
        createIndexDrdFile(IndexIO.V8_VERSION, v8OutDir, GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY), GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY), dataInterval, indexSpec.getBitmapSerdeFactory());
        indexIO.getDefaultIndexIOHandler().convertV8toV9(v8OutDir, outDir, indexSpec);
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) CompressedObjectStrategy(io.druid.segment.data.CompressedObjectStrategy) DateTime(org.joda.time.DateTime) ComplexMetricColumnSerializer(io.druid.segment.serde.ComplexMetricColumnSerializer) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) TreeSet(java.util.TreeSet) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File) Map(java.util.Map) Nullable(javax.annotation.Nullable) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl) Interval(org.joda.time.Interval) ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities) Function(com.google.common.base.Function) ByteSink(com.google.common.io.ByteSink) ISE(io.druid.java.util.common.ISE) ComplexMetricColumnSerializer(io.druid.segment.serde.ComplexMetricColumnSerializer) Closer(com.google.common.io.Closer) ValueType(io.druid.segment.column.ValueType) FileChannel(java.nio.channels.FileChannel) IOException(java.io.IOException) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CompressionFactory(io.druid.segment.data.CompressionFactory) FileOutputSupplier(io.druid.common.guava.FileOutputSupplier) LongSupplierSerializer(io.druid.segment.data.LongSupplierSerializer)

Example 27 with FileOutputStream

use of java.io.FileOutputStream in project druid by druid-io.

the class StringDimensionMergerLegacy method writeIndexesToFiles.

@Override
public void writeIndexesToFiles(final ByteSink invertedIndexFile, final OutputSupplier<FileOutputStream> spatialIndexFile) throws IOException {
    final SerializerUtils serializerUtils = new SerializerUtils();
    final OutputSupplier<OutputStream> invertedIndexOutputSupplier = new OutputSupplier<OutputStream>() {

        @Override
        public OutputStream getOutput() throws IOException {
            return invertedIndexFile.openStream();
        }
    };
    bitmapWriter.close();
    serializerUtils.writeString(invertedIndexOutputSupplier, dimensionName);
    ByteStreams.copy(bitmapWriter.combineStreams(), invertedIndexOutputSupplier);
    if (capabilities.hasSpatialIndexes()) {
        spatialWriter.close();
        serializerUtils.writeString(spatialIndexFile, dimensionName);
        ByteStreams.copy(spatialWriter.combineStreams(), spatialIndexFile);
    }
}
Also used : FileOutputSupplier(io.druid.common.guava.FileOutputSupplier) OutputSupplier(com.google.common.io.OutputSupplier) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) SerializerUtils(io.druid.common.utils.SerializerUtils)

Example 28 with FileOutputStream

use of java.io.FileOutputStream in project druid by druid-io.

the class IndexMerger method createIndexDrdFile.

public void createIndexDrdFile(byte versionId, File inDir, GenericIndexed<String> availableDimensions, GenericIndexed<String> availableMetrics, Interval dataInterval, BitmapSerdeFactory bitmapSerdeFactory) throws IOException {
    File indexFile = new File(inDir, "index.drd");
    try (FileChannel channel = new FileOutputStream(indexFile).getChannel()) {
        channel.write(ByteBuffer.wrap(new byte[] { versionId }));
        availableDimensions.writeToChannel(channel);
        availableMetrics.writeToChannel(channel);
        serializerUtils.writeString(channel, String.format("%s/%s", dataInterval.getStart(), dataInterval.getEnd()));
        serializerUtils.writeString(channel, mapper.writeValueAsString(bitmapSerdeFactory));
    }
    IndexIO.checkFileSize(indexFile);
}
Also used : FileChannel(java.nio.channels.FileChannel) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 29 with FileOutputStream

use of java.io.FileOutputStream in project deeplearning4j by deeplearning4j.

the class PatriciaTrieFormatter method format.

/**
     * Format trie and write to file
     *
     * @param trie  trie to format
     * @param file  file to write to
     * @param formatBitString  true if the bits for this key should be included in the node
     * @throws FileNotFoundException if the file exists but is a directory rather than a regular file,
     * does not exist but cannot be created, or cannot be opened for any other reason
     */
public void format(PatriciaTrie<V> trie, File file, boolean formatBitString) throws FileNotFoundException {
    PrintWriter writer = new PrintWriter(new FileOutputStream(file));
    writer.println(format(trie, formatBitString));
    writer.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) PrintWriter(java.io.PrintWriter)

Example 30 with FileOutputStream

use of java.io.FileOutputStream in project jetty.project by eclipse.

the class PutFilter method handlePut.

/* ------------------------------------------------------------------- */
public void handlePut(HttpServletRequest request, HttpServletResponse response, String pathInContext, File file) throws ServletException, IOException {
    boolean exists = file.exists();
    if (pathInContext.endsWith("/")) {
        if (!exists) {
            if (!file.mkdirs())
                response.sendError(HttpServletResponse.SC_FORBIDDEN);
            else {
                response.setStatus(HttpServletResponse.SC_CREATED);
                response.flushBuffer();
            }
        } else {
            response.setStatus(HttpServletResponse.SC_OK);
            response.flushBuffer();
        }
    } else {
        boolean ok = false;
        try {
            _hidden.put(pathInContext, pathInContext);
            File parent = file.getParentFile();
            parent.mkdirs();
            int toRead = request.getContentLength();
            InputStream in = request.getInputStream();
            if (_putAtomic) {
                File tmp = File.createTempFile(file.getName(), null, _tmpdir);
                try (OutputStream out = new FileOutputStream(tmp, false)) {
                    if (toRead >= 0)
                        IO.copy(in, out, toRead);
                    else
                        IO.copy(in, out);
                }
                if (!tmp.renameTo(file))
                    throw new IOException("rename from " + tmp + " to " + file + " failed");
            } else {
                try (OutputStream out = new FileOutputStream(file, false)) {
                    if (toRead >= 0)
                        IO.copy(in, out, toRead);
                    else
                        IO.copy(in, out);
                }
            }
            response.setStatus(exists ? HttpServletResponse.SC_OK : HttpServletResponse.SC_CREATED);
            response.flushBuffer();
            ok = true;
        } catch (Exception ex) {
            _context.log(ex.toString(), ex);
            response.sendError(HttpServletResponse.SC_FORBIDDEN);
        } finally {
            if (!ok) {
                try {
                    if (file.exists())
                        file.delete();
                } catch (Exception e) {
                    _context.log(e.toString(), e);
                }
            }
            _hidden.remove(pathInContext);
        }
    }
}
Also used : InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) File(java.io.File) ServletException(javax.servlet.ServletException) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) UnavailableException(javax.servlet.UnavailableException)

Aggregations

FileOutputStream (java.io.FileOutputStream)13792 File (java.io.File)8295 IOException (java.io.IOException)6166 FileInputStream (java.io.FileInputStream)2644 OutputStream (java.io.OutputStream)2605 InputStream (java.io.InputStream)2077 BufferedOutputStream (java.io.BufferedOutputStream)1755 FileNotFoundException (java.io.FileNotFoundException)1531 OutputStreamWriter (java.io.OutputStreamWriter)1440 Test (org.junit.Test)1115 ZipEntry (java.util.zip.ZipEntry)734 BufferedWriter (java.io.BufferedWriter)668 ArrayList (java.util.ArrayList)654 ZipOutputStream (java.util.zip.ZipOutputStream)642 BufferedInputStream (java.io.BufferedInputStream)604 ByteArrayOutputStream (java.io.ByteArrayOutputStream)556 PrintWriter (java.io.PrintWriter)530 Properties (java.util.Properties)497 URL (java.net.URL)478 Writer (java.io.Writer)477