Search in sources :

Example 6 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class BenchmarkColumnValueGenerator method generateSingleRowValue.

private Object generateSingleRowValue() {
    Object ret = null;
    ValueType type = schema.getType();
    if (distribution instanceof AbstractIntegerDistribution) {
        ret = ((AbstractIntegerDistribution) distribution).sample();
    } else if (distribution instanceof AbstractRealDistribution) {
        ret = ((AbstractRealDistribution) distribution).sample();
    } else if (distribution instanceof EnumeratedDistribution) {
        ret = ((EnumeratedDistribution) distribution).sample();
    }
    ret = convertType(ret, type);
    return ret;
}
Also used : AbstractRealDistribution(org.apache.commons.math3.distribution.AbstractRealDistribution) ValueType(io.druid.segment.column.ValueType) EnumeratedDistribution(org.apache.commons.math3.distribution.EnumeratedDistribution) AbstractIntegerDistribution(org.apache.commons.math3.distribution.AbstractIntegerDistribution)

Example 7 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class RowBasedGrouperHelper method makeValueConvertFunctions.

@SuppressWarnings("unchecked")
private static Function<Comparable, Comparable>[] makeValueConvertFunctions(final List<ValueType> valueTypes) {
    final Function<Comparable, Comparable>[] functions = new Function[valueTypes.size()];
    for (int i = 0; i < functions.length; i++) {
        ValueType type = valueTypes.get(i);
        // Subquery post-aggs aren't added to the rowSignature (see rowSignatureFor() in GroupByQueryHelper) because
        // their types aren't known, so default to String handling.
        type = type == null ? ValueType.STRING : type;
        switch(type) {
            case STRING:
                functions[i] = new Function<Comparable, Comparable>() {

                    @Override
                    public Comparable apply(@Nullable Comparable input) {
                        return input == null ? "" : input.toString();
                    }
                };
                break;
            case LONG:
                functions[i] = new Function<Comparable, Comparable>() {

                    @Override
                    public Comparable apply(@Nullable Comparable input) {
                        final Long val = DimensionHandlerUtils.convertObjectToLong(input);
                        return val == null ? 0L : val;
                    }
                };
                break;
            case FLOAT:
                functions[i] = new Function<Comparable, Comparable>() {

                    @Override
                    public Comparable apply(@Nullable Comparable input) {
                        final Float val = DimensionHandlerUtils.convertObjectToFloat(input);
                        return val == null ? 0.f : val;
                    }
                };
                break;
            default:
                throw new IAE("invalid type: [%s]", type);
        }
    }
    return functions;
}
Also used : Function(com.google.common.base.Function) ValueType(io.druid.segment.column.ValueType) IAE(io.druid.java.util.common.IAE)

Example 8 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class RowBasedGrouperHelper method makeValueConvertFunctions.

@SuppressWarnings("unchecked")
private static Function<Comparable, Comparable>[] makeValueConvertFunctions(final Map<String, ValueType> rawInputRowSignature, final List<DimensionSpec> dimensions) {
    final List<ValueType> valueTypes = Lists.newArrayListWithCapacity(dimensions.size());
    for (DimensionSpec dimensionSpec : dimensions) {
        final ValueType valueType = rawInputRowSignature.get(dimensionSpec);
        valueTypes.add(valueType == null ? ValueType.STRING : valueType);
    }
    return makeValueConvertFunctions(valueTypes);
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) ValueType(io.druid.segment.column.ValueType)

Example 9 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class IndexIO method validateRowValues.

public static void validateRowValues(Map<String, DimensionHandler> dimHandlers, Rowboat rb1, IndexableAdapter adapter1, Rowboat rb2, IndexableAdapter adapter2) {
    if (rb1.getTimestamp() != rb2.getTimestamp()) {
        throw new SegmentValidationException("Timestamp mismatch. Expected %d found %d", rb1.getTimestamp(), rb2.getTimestamp());
    }
    final Object[] dims1 = rb1.getDims();
    final Object[] dims2 = rb2.getDims();
    if (dims1.length != dims2.length) {
        throw new SegmentValidationException("Dim lengths not equal %s vs %s", Arrays.deepToString(dims1), Arrays.deepToString(dims2));
    }
    final Indexed<String> dim1Names = adapter1.getDimensionNames();
    final Indexed<String> dim2Names = adapter2.getDimensionNames();
    for (int i = 0; i < dims1.length; ++i) {
        final Object dim1Vals = dims1[i];
        final Object dim2Vals = dims2[i];
        final String dim1Name = dim1Names.get(i);
        final String dim2Name = dim2Names.get(i);
        ColumnCapabilities capabilities1 = adapter1.getCapabilities(dim1Name);
        ColumnCapabilities capabilities2 = adapter2.getCapabilities(dim2Name);
        ValueType dim1Type = capabilities1.getType();
        ValueType dim2Type = capabilities2.getType();
        if (dim1Type != dim2Type) {
            throw new SegmentValidationException("Dim [%s] types not equal. Expected %d found %d", dim1Name, dim1Type, dim2Type);
        }
        DimensionHandler dimHandler = dimHandlers.get(dim1Name);
        dimHandler.validateSortedEncodedKeyComponents(dim1Vals, dim2Vals, adapter1.getDimValueLookup(dim1Name), adapter2.getDimValueLookup(dim2Name));
    }
}
Also used : ValueType(io.druid.segment.column.ValueType) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities)

Example 10 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class IndexMerger method makeIndexFiles.

protected File makeIndexFiles(final List<IndexableAdapter> indexes, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
    List<Metadata> metadataList = Lists.transform(indexes, new Function<IndexableAdapter, Metadata>() {

        @Nullable
        @Override
        public Metadata apply(IndexableAdapter input) {
            return input.getMetadata();
        }
    });
    Metadata segmentMetadata = null;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    final Map<String, ValueType> valueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
    final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
    final Map<String, ColumnCapabilitiesImpl> columnCapabilities = Maps.newHashMap();
    final List<ColumnCapabilitiesImpl> dimCapabilities = new ArrayList<>();
    for (IndexableAdapter adapter : indexes) {
        for (String dimension : adapter.getDimensionNames()) {
            ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(dimension);
            ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
            if (mergedCapabilities == null) {
                mergedCapabilities = new ColumnCapabilitiesImpl();
            }
            columnCapabilities.put(dimension, mergedCapabilities.merge(capabilities));
        }
        for (String metric : adapter.getMetricNames()) {
            ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(metric);
            ColumnCapabilities capabilities = adapter.getCapabilities(metric);
            if (mergedCapabilities == null) {
                mergedCapabilities = new ColumnCapabilitiesImpl();
            }
            columnCapabilities.put(metric, mergedCapabilities.merge(capabilities));
            valueTypes.put(metric, capabilities.getType());
            metricTypeNames.put(metric, adapter.getMetricType(metric));
        }
    }
    for (String dimension : mergedDimensions) {
        dimCapabilities.add(columnCapabilities.get(dimension));
    }
    Closer closer = Closer.create();
    try {
        final Interval dataInterval;
        final File v8OutDir = new File(outDir, "v8-tmp");
        FileUtils.forceMkdir(v8OutDir);
        registerDeleteDirectory(closer, v8OutDir);
        File tmpPeonFilesDir = new File(v8OutDir, "tmpPeonFiles");
        FileUtils.forceMkdir(tmpPeonFilesDir);
        registerDeleteDirectory(closer, tmpPeonFilesDir);
        final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, true);
        closer.register(ioPeon);
        /*************  Main index.drd file **************/
        progress.progress();
        long startTime = System.currentTimeMillis();
        File indexFile = new File(v8OutDir, "index.drd");
        try (FileOutputStream fileOutputStream = new FileOutputStream(indexFile);
            FileChannel channel = fileOutputStream.getChannel()) {
            channel.write(ByteBuffer.wrap(new byte[] { IndexIO.V8_VERSION }));
            GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
            GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
            DateTime minTime = new DateTime(JodaUtils.MAX_INSTANT);
            DateTime maxTime = new DateTime(JodaUtils.MIN_INSTANT);
            for (IndexableAdapter index : indexes) {
                minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart());
                maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd());
            }
            dataInterval = new Interval(minTime, maxTime);
            serializerUtils.writeString(channel, String.format("%s/%s", minTime, maxTime));
            serializerUtils.writeString(channel, mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory()));
        }
        IndexIO.checkFileSize(indexFile);
        log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        /************* Setup Dim Conversions **************/
        progress.progress();
        startTime = System.currentTimeMillis();
        final ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size());
        final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMerger> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerLegacy merger = handlers[i].makeLegacyMerger(indexSpec, v8OutDir, ioPeon, dimCapabilities.get(i), progress);
            mergers.add(merger);
            merger.writeMergedValueMetadata(indexes);
            FileOutputSupplier dimOut = new FileOutputSupplier(merger.makeDimFile(), true);
            merger.writeValueMetadataToFile(dimOut);
            dimOuts.add(dimOut);
        }
        log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        /************* Walk through data sets and merge them *************/
        progress.progress();
        startTime = System.currentTimeMillis();
        Iterable<Rowboat> theRows = makeRowIterable(indexes, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
        LongSupplierSerializer timeWriter = CompressionFactory.getLongSerializer(ioPeon, "little_end_time", IndexIO.BYTE_ORDER, indexSpec.getLongEncoding(), CompressedObjectStrategy.DEFAULT_COMPRESSION_STRATEGY);
        timeWriter.open();
        ArrayList<MetricColumnSerializer> metWriters = Lists.newArrayListWithCapacity(mergedMetrics.size());
        final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression();
        final CompressionFactory.LongEncodingStrategy longEncoding = indexSpec.getLongEncoding();
        for (String metric : mergedMetrics) {
            ValueType type = valueTypes.get(metric);
            switch(type) {
                case LONG:
                    metWriters.add(new LongMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression, longEncoding));
                    break;
                case FLOAT:
                    metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression));
                    break;
                case COMPLEX:
                    final String typeName = metricTypeNames.get(metric);
                    ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
                    if (serde == null) {
                        throw new ISE("Unknown type[%s]", typeName);
                    }
                    metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde));
                    break;
                default:
                    throw new ISE("Unknown type[%s]", type);
            }
        }
        for (MetricColumnSerializer metWriter : metWriters) {
            metWriter.open();
        }
        int rowCount = 0;
        long time = System.currentTimeMillis();
        List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(indexes.size());
        for (IndexableAdapter index : indexes) {
            int[] arr = new int[index.getNumRows()];
            Arrays.fill(arr, INVALID_ROW);
            rowNumConversions.add(IntBuffer.wrap(arr));
        }
        for (Rowboat theRow : theRows) {
            progress.progress();
            timeWriter.add(theRow.getTimestamp());
            final Object[] metrics = theRow.getMetrics();
            for (int i = 0; i < metrics.length; ++i) {
                metWriters.get(i).serialize(metrics[i]);
            }
            Object[] dims = theRow.getDims();
            for (int i = 0; i < dims.length; ++i) {
                mergers.get(i).processMergedRow(dims[i]);
            }
            for (Map.Entry<Integer, TreeSet<Integer>> comprisedRow : theRow.getComprisedRows().entrySet()) {
                final IntBuffer conversionBuffer = rowNumConversions.get(comprisedRow.getKey());
                for (Integer rowNum : comprisedRow.getValue()) {
                    while (conversionBuffer.position() < rowNum) {
                        conversionBuffer.put(INVALID_ROW);
                    }
                    conversionBuffer.put(rowCount);
                }
            }
            if ((++rowCount % 500000) == 0) {
                log.info("outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time);
                time = System.currentTimeMillis();
            }
        }
        for (IntBuffer rowNumConversion : rowNumConversions) {
            rowNumConversion.rewind();
        }
        final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER);
        timeFile.delete();
        ByteSink out = Files.asByteSink(timeFile, FileWriteMode.APPEND);
        timeWriter.closeAndConsolidate(out);
        IndexIO.checkFileSize(timeFile);
        for (MetricColumnSerializer metWriter : metWriters) {
            metWriter.close();
        }
        log.info("outDir[%s] completed walk through of %,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - startTime);
        /************ Create Inverted Indexes and Finalize Columns *************/
        startTime = System.currentTimeMillis();
        final File invertedFile = new File(v8OutDir, "inverted.drd");
        Files.touch(invertedFile);
        out = Files.asByteSink(invertedFile, FileWriteMode.APPEND);
        final File geoFile = new File(v8OutDir, "spatial.drd");
        Files.touch(geoFile);
        OutputSupplier<FileOutputStream> spatialOut = Files.newOutputStreamSupplier(geoFile, true);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerLegacy legacyMerger = (DimensionMergerLegacy) mergers.get(i);
            legacyMerger.writeIndexes(rowNumConversions, closer);
            legacyMerger.writeIndexesToFiles(out, spatialOut);
            legacyMerger.writeRowValuesToFile(dimOuts.get(i));
        }
        log.info("outDir[%s] completed inverted.drd and wrote dimensions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        final Function<String, String> dimFilenameFunction = new Function<String, String>() {

            @Override
            public String apply(@Nullable String input) {
                String formatString;
                if (columnCapabilities.get(input).isDictionaryEncoded()) {
                    formatString = "dim_%s.drd";
                } else {
                    formatString = String.format("numeric_dim_%%s_%s.drd", IndexIO.BYTE_ORDER);
                }
                return GuavaUtils.formatFunction(formatString).apply(input);
            }
        };
        final ArrayList<String> expectedFiles = Lists.newArrayList(Iterables.concat(Arrays.asList("index.drd", "inverted.drd", "spatial.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER)), Iterables.transform(mergedDimensions, dimFilenameFunction), Iterables.transform(mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER)))));
        if (segmentMetadata != null) {
            writeMetadataToFile(new File(v8OutDir, "metadata.drd"), segmentMetadata);
            log.info("wrote metadata.drd in outDir[%s].", v8OutDir);
            expectedFiles.add("metadata.drd");
        }
        Map<String, File> files = Maps.newLinkedHashMap();
        for (String fileName : expectedFiles) {
            files.put(fileName, new File(v8OutDir, fileName));
        }
        File smooshDir = new File(v8OutDir, "smoosher");
        FileUtils.forceMkdir(smooshDir);
        for (Map.Entry<String, File> entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) {
            entry.getValue().delete();
        }
        for (File file : smooshDir.listFiles()) {
            Files.move(file, new File(v8OutDir, file.getName()));
        }
        if (!smooshDir.delete()) {
            log.info("Unable to delete temporary dir[%s], contains[%s]", smooshDir, Arrays.asList(smooshDir.listFiles()));
            throw new IOException(String.format("Unable to delete temporary dir[%s]", smooshDir));
        }
        createIndexDrdFile(IndexIO.V8_VERSION, v8OutDir, GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY), GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY), dataInterval, indexSpec.getBitmapSerdeFactory());
        indexIO.getDefaultIndexIOHandler().convertV8toV9(v8OutDir, outDir, indexSpec);
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) CompressedObjectStrategy(io.druid.segment.data.CompressedObjectStrategy) DateTime(org.joda.time.DateTime) ComplexMetricColumnSerializer(io.druid.segment.serde.ComplexMetricColumnSerializer) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) TreeSet(java.util.TreeSet) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File) Map(java.util.Map) Nullable(javax.annotation.Nullable) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl) Interval(org.joda.time.Interval) ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities) Function(com.google.common.base.Function) ByteSink(com.google.common.io.ByteSink) ISE(io.druid.java.util.common.ISE) ComplexMetricColumnSerializer(io.druid.segment.serde.ComplexMetricColumnSerializer) Closer(com.google.common.io.Closer) ValueType(io.druid.segment.column.ValueType) FileChannel(java.nio.channels.FileChannel) IOException(java.io.IOException) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CompressionFactory(io.druid.segment.data.CompressionFactory) FileOutputSupplier(io.druid.common.guava.FileOutputSupplier) LongSupplierSerializer(io.druid.segment.data.LongSupplierSerializer)

Aggregations

ValueType (io.druid.segment.column.ValueType)24 ISE (io.druid.java.util.common.ISE)8 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)5 DimensionSpec (io.druid.query.dimension.DimensionSpec)4 ColumnCapabilities (io.druid.segment.column.ColumnCapabilities)4 Closer (com.google.common.io.Closer)3 Column (io.druid.segment.column.Column)3 ColumnCapabilitiesImpl (io.druid.segment.column.ColumnCapabilitiesImpl)3 ComplexColumn (io.druid.segment.column.ComplexColumn)3 Function (com.google.common.base.Function)2 Supplier (com.google.common.base.Supplier)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Row (io.druid.data.input.Row)2 IAE (io.druid.java.util.common.IAE)2 ColumnAnalysis (io.druid.query.metadata.metadata.ColumnAnalysis)2 ColumnDescriptor (io.druid.segment.column.ColumnDescriptor)2 DictionaryEncodedColumn (io.druid.segment.column.DictionaryEncodedColumn)2 GenericColumn (io.druid.segment.column.GenericColumn)2 IndexedFloatsGenericColumn (io.druid.segment.column.IndexedFloatsGenericColumn)2 IndexedLongsGenericColumn (io.druid.segment.column.IndexedLongsGenericColumn)2