Search in sources :

Example 1 with ComplexMetricSerde

use of io.druid.segment.serde.ComplexMetricSerde in project druid by druid-io.

the class StringArrayWritable method fromBytes.

public static final InputRow fromBytes(byte[] data, AggregatorFactory[] aggs) {
    try {
        DataInput in = ByteStreams.newDataInput(data);
        //Read timestamp
        long timestamp = in.readLong();
        Map<String, Object> event = Maps.newHashMap();
        //Read dimensions
        List<String> dimensions = Lists.newArrayList();
        int dimNum = WritableUtils.readVInt(in);
        for (int i = 0; i < dimNum; i++) {
            String dimension = readString(in);
            dimensions.add(dimension);
            List<String> dimensionValues = readStringArray(in);
            if (dimensionValues == null) {
                continue;
            }
            if (dimensionValues.size() == 1) {
                event.put(dimension, dimensionValues.get(0));
            } else {
                event.put(dimension, dimensionValues);
            }
        }
        //Read metrics
        int metricSize = WritableUtils.readVInt(in);
        for (int i = 0; i < metricSize; i++) {
            String metric = readString(in);
            String type = getType(metric, aggs, i);
            if (type.equals("float")) {
                event.put(metric, in.readFloat());
            } else if (type.equals("long")) {
                event.put(metric, WritableUtils.readVLong(in));
            } else {
                ComplexMetricSerde serde = getComplexMetricSerde(type);
                byte[] value = readBytes(in);
                event.put(metric, serde.fromBytes(value, 0, value.length));
            }
        }
        return new MapBasedInputRow(timestamp, dimensions, event);
    } catch (IOException ex) {
        throw Throwables.propagate(ex);
    }
}
Also used : DataInput(java.io.DataInput) ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) IOException(java.io.IOException)

Example 2 with ComplexMetricSerde

use of io.druid.segment.serde.ComplexMetricSerde in project druid by druid-io.

the class StringArrayWritable method toBytes.

public static final byte[] toBytes(final InputRow row, AggregatorFactory[] aggs, boolean reportParseExceptions) {
    try {
        ByteArrayDataOutput out = ByteStreams.newDataOutput();
        //write timestamp
        out.writeLong(row.getTimestampFromEpoch());
        //writing all dimensions
        List<String> dimList = row.getDimensions();
        WritableUtils.writeVInt(out, dimList.size());
        if (dimList != null) {
            for (String dim : dimList) {
                List<String> dimValues = row.getDimension(dim);
                writeString(dim, out);
                writeStringArray(dimValues, out);
            }
        }
        //writing all metrics
        Supplier<InputRow> supplier = new Supplier<InputRow>() {

            @Override
            public InputRow get() {
                return row;
            }
        };
        WritableUtils.writeVInt(out, aggs.length);
        for (AggregatorFactory aggFactory : aggs) {
            String k = aggFactory.getName();
            writeString(k, out);
            Aggregator agg = aggFactory.factorize(IncrementalIndex.makeColumnSelectorFactory(VirtualColumns.EMPTY, aggFactory, supplier, true));
            try {
                agg.aggregate();
            } catch (ParseException e) {
                // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                if (reportParseExceptions) {
                    throw new ParseException(e, "Encountered parse error for aggregator[%s]", k);
                }
                log.debug(e, "Encountered parse error, skipping aggregator[%s].", k);
            }
            String t = aggFactory.getTypeName();
            if (t.equals("float")) {
                out.writeFloat(agg.getFloat());
            } else if (t.equals("long")) {
                WritableUtils.writeVLong(out, agg.getLong());
            } else {
                //its a complex metric
                Object val = agg.get();
                ComplexMetricSerde serde = getComplexMetricSerde(t);
                writeBytes(serde.toBytes(val), out);
            }
        }
        return out.toByteArray();
    } catch (IOException ex) {
        throw Throwables.propagate(ex);
    }
}
Also used : ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) ByteArrayDataOutput(com.google.common.io.ByteArrayDataOutput) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) Aggregator(io.druid.query.aggregation.Aggregator) Supplier(com.google.common.base.Supplier) ParseException(io.druid.java.util.common.parsers.ParseException) IOException(java.io.IOException) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 3 with ComplexMetricSerde

use of io.druid.segment.serde.ComplexMetricSerde in project druid by druid-io.

the class SegmentAnalyzer method analyzeComplexColumn.

private ColumnAnalysis analyzeComplexColumn(@Nullable final ColumnCapabilities capabilities, @Nullable final Column column, final String typeName) {
    try (final ComplexColumn complexColumn = column != null ? column.getComplexColumn() : null) {
        final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues();
        long size = 0;
        if (analyzingSize() && complexColumn != null) {
            final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
            if (serde == null) {
                return ColumnAnalysis.error(String.format("unknown_complex_%s", typeName));
            }
            final Function<Object, Long> inputSizeFn = serde.inputSizeFn();
            if (inputSizeFn == null) {
                return new ColumnAnalysis(typeName, hasMultipleValues, 0, null, null, null, null);
            }
            final int length = column.getLength();
            for (int i = 0; i < length; ++i) {
                size += inputSizeFn.apply(complexColumn.getRowValue(i));
            }
        }
        return new ColumnAnalysis(typeName, hasMultipleValues, size, null, null, null, null);
    }
}
Also used : ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) ComplexColumn(io.druid.segment.column.ComplexColumn)

Example 4 with ComplexMetricSerde

use of io.druid.segment.serde.ComplexMetricSerde in project druid by druid-io.

the class IndexMerger method makeIndexFiles.

protected File makeIndexFiles(final List<IndexableAdapter> indexes, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
    List<Metadata> metadataList = Lists.transform(indexes, new Function<IndexableAdapter, Metadata>() {

        @Nullable
        @Override
        public Metadata apply(IndexableAdapter input) {
            return input.getMetadata();
        }
    });
    Metadata segmentMetadata = null;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    final Map<String, ValueType> valueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
    final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
    final Map<String, ColumnCapabilitiesImpl> columnCapabilities = Maps.newHashMap();
    final List<ColumnCapabilitiesImpl> dimCapabilities = new ArrayList<>();
    for (IndexableAdapter adapter : indexes) {
        for (String dimension : adapter.getDimensionNames()) {
            ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(dimension);
            ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
            if (mergedCapabilities == null) {
                mergedCapabilities = new ColumnCapabilitiesImpl();
            }
            columnCapabilities.put(dimension, mergedCapabilities.merge(capabilities));
        }
        for (String metric : adapter.getMetricNames()) {
            ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(metric);
            ColumnCapabilities capabilities = adapter.getCapabilities(metric);
            if (mergedCapabilities == null) {
                mergedCapabilities = new ColumnCapabilitiesImpl();
            }
            columnCapabilities.put(metric, mergedCapabilities.merge(capabilities));
            valueTypes.put(metric, capabilities.getType());
            metricTypeNames.put(metric, adapter.getMetricType(metric));
        }
    }
    for (String dimension : mergedDimensions) {
        dimCapabilities.add(columnCapabilities.get(dimension));
    }
    Closer closer = Closer.create();
    try {
        final Interval dataInterval;
        final File v8OutDir = new File(outDir, "v8-tmp");
        FileUtils.forceMkdir(v8OutDir);
        registerDeleteDirectory(closer, v8OutDir);
        File tmpPeonFilesDir = new File(v8OutDir, "tmpPeonFiles");
        FileUtils.forceMkdir(tmpPeonFilesDir);
        registerDeleteDirectory(closer, tmpPeonFilesDir);
        final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, true);
        closer.register(ioPeon);
        /*************  Main index.drd file **************/
        progress.progress();
        long startTime = System.currentTimeMillis();
        File indexFile = new File(v8OutDir, "index.drd");
        try (FileOutputStream fileOutputStream = new FileOutputStream(indexFile);
            FileChannel channel = fileOutputStream.getChannel()) {
            channel.write(ByteBuffer.wrap(new byte[] { IndexIO.V8_VERSION }));
            GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
            GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
            DateTime minTime = new DateTime(JodaUtils.MAX_INSTANT);
            DateTime maxTime = new DateTime(JodaUtils.MIN_INSTANT);
            for (IndexableAdapter index : indexes) {
                minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart());
                maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd());
            }
            dataInterval = new Interval(minTime, maxTime);
            serializerUtils.writeString(channel, String.format("%s/%s", minTime, maxTime));
            serializerUtils.writeString(channel, mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory()));
        }
        IndexIO.checkFileSize(indexFile);
        log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        /************* Setup Dim Conversions **************/
        progress.progress();
        startTime = System.currentTimeMillis();
        final ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size());
        final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMerger> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerLegacy merger = handlers[i].makeLegacyMerger(indexSpec, v8OutDir, ioPeon, dimCapabilities.get(i), progress);
            mergers.add(merger);
            merger.writeMergedValueMetadata(indexes);
            FileOutputSupplier dimOut = new FileOutputSupplier(merger.makeDimFile(), true);
            merger.writeValueMetadataToFile(dimOut);
            dimOuts.add(dimOut);
        }
        log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        /************* Walk through data sets and merge them *************/
        progress.progress();
        startTime = System.currentTimeMillis();
        Iterable<Rowboat> theRows = makeRowIterable(indexes, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
        LongSupplierSerializer timeWriter = CompressionFactory.getLongSerializer(ioPeon, "little_end_time", IndexIO.BYTE_ORDER, indexSpec.getLongEncoding(), CompressedObjectStrategy.DEFAULT_COMPRESSION_STRATEGY);
        timeWriter.open();
        ArrayList<MetricColumnSerializer> metWriters = Lists.newArrayListWithCapacity(mergedMetrics.size());
        final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression();
        final CompressionFactory.LongEncodingStrategy longEncoding = indexSpec.getLongEncoding();
        for (String metric : mergedMetrics) {
            ValueType type = valueTypes.get(metric);
            switch(type) {
                case LONG:
                    metWriters.add(new LongMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression, longEncoding));
                    break;
                case FLOAT:
                    metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression));
                    break;
                case COMPLEX:
                    final String typeName = metricTypeNames.get(metric);
                    ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
                    if (serde == null) {
                        throw new ISE("Unknown type[%s]", typeName);
                    }
                    metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde));
                    break;
                default:
                    throw new ISE("Unknown type[%s]", type);
            }
        }
        for (MetricColumnSerializer metWriter : metWriters) {
            metWriter.open();
        }
        int rowCount = 0;
        long time = System.currentTimeMillis();
        List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(indexes.size());
        for (IndexableAdapter index : indexes) {
            int[] arr = new int[index.getNumRows()];
            Arrays.fill(arr, INVALID_ROW);
            rowNumConversions.add(IntBuffer.wrap(arr));
        }
        for (Rowboat theRow : theRows) {
            progress.progress();
            timeWriter.add(theRow.getTimestamp());
            final Object[] metrics = theRow.getMetrics();
            for (int i = 0; i < metrics.length; ++i) {
                metWriters.get(i).serialize(metrics[i]);
            }
            Object[] dims = theRow.getDims();
            for (int i = 0; i < dims.length; ++i) {
                mergers.get(i).processMergedRow(dims[i]);
            }
            for (Map.Entry<Integer, TreeSet<Integer>> comprisedRow : theRow.getComprisedRows().entrySet()) {
                final IntBuffer conversionBuffer = rowNumConversions.get(comprisedRow.getKey());
                for (Integer rowNum : comprisedRow.getValue()) {
                    while (conversionBuffer.position() < rowNum) {
                        conversionBuffer.put(INVALID_ROW);
                    }
                    conversionBuffer.put(rowCount);
                }
            }
            if ((++rowCount % 500000) == 0) {
                log.info("outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time);
                time = System.currentTimeMillis();
            }
        }
        for (IntBuffer rowNumConversion : rowNumConversions) {
            rowNumConversion.rewind();
        }
        final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER);
        timeFile.delete();
        ByteSink out = Files.asByteSink(timeFile, FileWriteMode.APPEND);
        timeWriter.closeAndConsolidate(out);
        IndexIO.checkFileSize(timeFile);
        for (MetricColumnSerializer metWriter : metWriters) {
            metWriter.close();
        }
        log.info("outDir[%s] completed walk through of %,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - startTime);
        /************ Create Inverted Indexes and Finalize Columns *************/
        startTime = System.currentTimeMillis();
        final File invertedFile = new File(v8OutDir, "inverted.drd");
        Files.touch(invertedFile);
        out = Files.asByteSink(invertedFile, FileWriteMode.APPEND);
        final File geoFile = new File(v8OutDir, "spatial.drd");
        Files.touch(geoFile);
        OutputSupplier<FileOutputStream> spatialOut = Files.newOutputStreamSupplier(geoFile, true);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerLegacy legacyMerger = (DimensionMergerLegacy) mergers.get(i);
            legacyMerger.writeIndexes(rowNumConversions, closer);
            legacyMerger.writeIndexesToFiles(out, spatialOut);
            legacyMerger.writeRowValuesToFile(dimOuts.get(i));
        }
        log.info("outDir[%s] completed inverted.drd and wrote dimensions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        final Function<String, String> dimFilenameFunction = new Function<String, String>() {

            @Override
            public String apply(@Nullable String input) {
                String formatString;
                if (columnCapabilities.get(input).isDictionaryEncoded()) {
                    formatString = "dim_%s.drd";
                } else {
                    formatString = String.format("numeric_dim_%%s_%s.drd", IndexIO.BYTE_ORDER);
                }
                return GuavaUtils.formatFunction(formatString).apply(input);
            }
        };
        final ArrayList<String> expectedFiles = Lists.newArrayList(Iterables.concat(Arrays.asList("index.drd", "inverted.drd", "spatial.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER)), Iterables.transform(mergedDimensions, dimFilenameFunction), Iterables.transform(mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER)))));
        if (segmentMetadata != null) {
            writeMetadataToFile(new File(v8OutDir, "metadata.drd"), segmentMetadata);
            log.info("wrote metadata.drd in outDir[%s].", v8OutDir);
            expectedFiles.add("metadata.drd");
        }
        Map<String, File> files = Maps.newLinkedHashMap();
        for (String fileName : expectedFiles) {
            files.put(fileName, new File(v8OutDir, fileName));
        }
        File smooshDir = new File(v8OutDir, "smoosher");
        FileUtils.forceMkdir(smooshDir);
        for (Map.Entry<String, File> entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) {
            entry.getValue().delete();
        }
        for (File file : smooshDir.listFiles()) {
            Files.move(file, new File(v8OutDir, file.getName()));
        }
        if (!smooshDir.delete()) {
            log.info("Unable to delete temporary dir[%s], contains[%s]", smooshDir, Arrays.asList(smooshDir.listFiles()));
            throw new IOException(String.format("Unable to delete temporary dir[%s]", smooshDir));
        }
        createIndexDrdFile(IndexIO.V8_VERSION, v8OutDir, GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY), GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY), dataInterval, indexSpec.getBitmapSerdeFactory());
        indexIO.getDefaultIndexIOHandler().convertV8toV9(v8OutDir, outDir, indexSpec);
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) CompressedObjectStrategy(io.druid.segment.data.CompressedObjectStrategy) DateTime(org.joda.time.DateTime) ComplexMetricColumnSerializer(io.druid.segment.serde.ComplexMetricColumnSerializer) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) TreeSet(java.util.TreeSet) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File) Map(java.util.Map) Nullable(javax.annotation.Nullable) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl) Interval(org.joda.time.Interval) ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities) Function(com.google.common.base.Function) ByteSink(com.google.common.io.ByteSink) ISE(io.druid.java.util.common.ISE) ComplexMetricColumnSerializer(io.druid.segment.serde.ComplexMetricColumnSerializer) Closer(com.google.common.io.Closer) ValueType(io.druid.segment.column.ValueType) FileChannel(java.nio.channels.FileChannel) IOException(java.io.IOException) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CompressionFactory(io.druid.segment.data.CompressionFactory) FileOutputSupplier(io.druid.common.guava.FileOutputSupplier) LongSupplierSerializer(io.druid.segment.data.LongSupplierSerializer)

Example 5 with ComplexMetricSerde

use of io.druid.segment.serde.ComplexMetricSerde in project druid by druid-io.

the class MetricHolder method fromByteBuffer.

public static MetricHolder fromByteBuffer(ByteBuffer buf, ObjectStrategy strategy, SmooshedFileMapper mapper) throws IOException {
    final byte ver = buf.get();
    if (version[0] != ver) {
        throw new ISE("Unknown version[%s] of MetricHolder", ver);
    }
    final String metricName = serializerUtils.readString(buf);
    final String typeName = serializerUtils.readString(buf);
    MetricHolder holder = new MetricHolder(metricName, typeName);
    switch(holder.type) {
        case LONG:
            holder.longType = CompressedLongsIndexedSupplier.fromByteBuffer(buf, ByteOrder.nativeOrder(), mapper);
            break;
        case FLOAT:
            holder.floatType = CompressedFloatsIndexedSupplier.fromByteBuffer(buf, ByteOrder.nativeOrder(), mapper);
            break;
        case COMPLEX:
            if (strategy != null) {
                holder.complexType = GenericIndexed.read(buf, strategy, mapper);
            } else {
                final ComplexMetricSerde serdeForType = ComplexMetrics.getSerdeForType(holder.getTypeName());
                if (serdeForType == null) {
                    throw new ISE("Unknown type[%s], cannot load.", holder.getTypeName());
                }
                holder.complexType = GenericIndexed.read(buf, serdeForType.getObjectStrategy());
            }
            break;
    }
    return holder;
}
Also used : ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) ISE(io.druid.java.util.common.ISE)

Aggregations

ComplexMetricSerde (io.druid.segment.serde.ComplexMetricSerde)7 ISE (io.druid.java.util.common.ISE)4 IOException (java.io.IOException)3 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)2 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)2 ValueType (io.druid.segment.column.ValueType)2 CompressedObjectStrategy (io.druid.segment.data.CompressedObjectStrategy)2 CompressionFactory (io.druid.segment.data.CompressionFactory)2 Function (com.google.common.base.Function)1 Supplier (com.google.common.base.Supplier)1 ByteArrayDataOutput (com.google.common.io.ByteArrayDataOutput)1 ByteSink (com.google.common.io.ByteSink)1 Closer (com.google.common.io.Closer)1 FileOutputSupplier (io.druid.common.guava.FileOutputSupplier)1 InputRow (io.druid.data.input.InputRow)1 ParseException (io.druid.java.util.common.parsers.ParseException)1 Aggregator (io.druid.query.aggregation.Aggregator)1 DimensionSpec (io.druid.query.dimension.DimensionSpec)1 RowBasedColumnSelectorFactory (io.druid.query.groupby.RowBasedColumnSelectorFactory)1 ColumnAnalysis (io.druid.query.metadata.metadata.ColumnAnalysis)1