Search in sources :

Example 1 with DimensionIndexer

use of io.druid.segment.DimensionIndexer in project druid by druid-io.

the class IncrementalIndexStorageAdapter method getDimensionCardinality.

@Override
public int getDimensionCardinality(String dimension) {
    if (dimension.equals(Column.TIME_COLUMN_NAME)) {
        return Integer.MAX_VALUE;
    }
    IncrementalIndex.DimensionDesc desc = index.getDimension(dimension);
    if (desc == null) {
        return 0;
    }
    DimensionIndexer indexer = index.getDimension(dimension).getIndexer();
    return indexer.getCardinality();
}
Also used : DimensionIndexer(io.druid.segment.DimensionIndexer)

Example 2 with DimensionIndexer

use of io.druid.segment.DimensionIndexer in project druid by druid-io.

the class IncrementalIndexStorageAdapter method getMinValue.

@Override
public Comparable getMinValue(String column) {
    IncrementalIndex.DimensionDesc desc = index.getDimension(column);
    if (desc == null) {
        return null;
    }
    DimensionIndexer indexer = desc.getIndexer();
    return indexer.getMinValue();
}
Also used : DimensionIndexer(io.druid.segment.DimensionIndexer)

Example 3 with DimensionIndexer

use of io.druid.segment.DimensionIndexer in project druid by druid-io.

the class IncrementalIndexStorageAdapter method makeCursors.

@Override
public Sequence<Cursor> makeCursors(final Filter filter, final Interval interval, final VirtualColumns virtualColumns, final Granularity gran, final boolean descending) {
    if (index.isEmpty()) {
        return Sequences.empty();
    }
    Interval actualIntervalTmp = interval;
    final Interval dataInterval = new Interval(getMinTime().getMillis(), gran.bucketEnd(getMaxTime()).getMillis());
    if (!actualIntervalTmp.overlaps(dataInterval)) {
        return Sequences.empty();
    }
    if (actualIntervalTmp.getStart().isBefore(dataInterval.getStart())) {
        actualIntervalTmp = actualIntervalTmp.withStart(dataInterval.getStart());
    }
    if (actualIntervalTmp.getEnd().isAfter(dataInterval.getEnd())) {
        actualIntervalTmp = actualIntervalTmp.withEnd(dataInterval.getEnd());
    }
    final Interval actualInterval = actualIntervalTmp;
    Iterable<Interval> iterable = gran.getIterable(actualInterval);
    if (descending) {
        iterable = Lists.reverse(ImmutableList.copyOf(iterable));
    }
    return Sequences.map(Sequences.simple(iterable), new Function<Interval, Cursor>() {

        EntryHolder currEntry = new EntryHolder();

        @Override
        public Cursor apply(@Nullable final Interval interval) {
            final long timeStart = Math.max(interval.getStartMillis(), actualInterval.getStartMillis());
            return new Cursor() {

                private final ValueMatcher filterMatcher = makeFilterMatcher(filter, this);

                private Iterator<Map.Entry<IncrementalIndex.TimeAndDims, Integer>> baseIter;

                private Iterable<Map.Entry<IncrementalIndex.TimeAndDims, Integer>> cursorIterable;

                private boolean emptyRange;

                final DateTime time;

                int numAdvanced = -1;

                boolean done;

                {
                    cursorIterable = index.getFacts().timeRangeIterable(descending, timeStart, Math.min(actualInterval.getEndMillis(), gran.increment(interval.getStart()).getMillis()));
                    emptyRange = !cursorIterable.iterator().hasNext();
                    time = gran.toDateTime(interval.getStartMillis());
                    reset();
                }

                @Override
                public DateTime getTime() {
                    return time;
                }

                @Override
                public void advance() {
                    if (!baseIter.hasNext()) {
                        done = true;
                        return;
                    }
                    while (baseIter.hasNext()) {
                        BaseQuery.checkInterrupted();
                        currEntry.set(baseIter.next());
                        if (filterMatcher.matches()) {
                            return;
                        }
                    }
                    if (!filterMatcher.matches()) {
                        done = true;
                    }
                }

                @Override
                public void advanceUninterruptibly() {
                    if (!baseIter.hasNext()) {
                        done = true;
                        return;
                    }
                    while (baseIter.hasNext()) {
                        if (Thread.currentThread().isInterrupted()) {
                            return;
                        }
                        currEntry.set(baseIter.next());
                        if (filterMatcher.matches()) {
                            return;
                        }
                    }
                    if (!filterMatcher.matches()) {
                        done = true;
                    }
                }

                @Override
                public void advanceTo(int offset) {
                    int count = 0;
                    while (count < offset && !isDone()) {
                        advance();
                        count++;
                    }
                }

                @Override
                public boolean isDone() {
                    return done;
                }

                @Override
                public boolean isDoneOrInterrupted() {
                    return isDone() || Thread.currentThread().isInterrupted();
                }

                @Override
                public void reset() {
                    baseIter = cursorIterable.iterator();
                    if (numAdvanced == -1) {
                        numAdvanced = 0;
                    } else {
                        Iterators.advance(baseIter, numAdvanced);
                    }
                    BaseQuery.checkInterrupted();
                    boolean foundMatched = false;
                    while (baseIter.hasNext()) {
                        currEntry.set(baseIter.next());
                        if (filterMatcher.matches()) {
                            foundMatched = true;
                            break;
                        }
                        numAdvanced++;
                    }
                    done = !foundMatched && (emptyRange || !baseIter.hasNext());
                }

                @Override
                public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) {
                    if (virtualColumns.exists(dimensionSpec.getDimension())) {
                        return virtualColumns.makeDimensionSelector(dimensionSpec, this);
                    }
                    return dimensionSpec.decorate(makeDimensionSelectorUndecorated(dimensionSpec));
                }

                private DimensionSelector makeDimensionSelectorUndecorated(DimensionSpec dimensionSpec) {
                    final String dimension = dimensionSpec.getDimension();
                    final ExtractionFn extractionFn = dimensionSpec.getExtractionFn();
                    if (dimension.equals(Column.TIME_COLUMN_NAME)) {
                        DimensionSelector selector = new SingleScanTimeDimSelector(makeLongColumnSelector(dimension), extractionFn, descending);
                        return selector;
                    }
                    final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(dimensionSpec.getDimension());
                    if (dimensionDesc == null) {
                        // not a dimension, column may be a metric
                        ColumnCapabilities capabilities = getColumnCapabilities(dimension);
                        if (capabilities == null) {
                            return NullDimensionSelector.instance();
                        }
                        if (capabilities.getType() == ValueType.LONG) {
                            return new LongWrappingDimensionSelector(makeLongColumnSelector(dimension), extractionFn);
                        }
                        if (capabilities.getType() == ValueType.FLOAT) {
                            return new FloatWrappingDimensionSelector(makeFloatColumnSelector(dimension), extractionFn);
                        }
                        // if we can't wrap the base column, just return a column of all nulls
                        return NullDimensionSelector.instance();
                    } else {
                        final DimensionIndexer indexer = dimensionDesc.getIndexer();
                        return indexer.makeDimensionSelector(dimensionSpec, currEntry, dimensionDesc);
                    }
                }

                @Override
                public FloatColumnSelector makeFloatColumnSelector(String columnName) {
                    if (virtualColumns.exists(columnName)) {
                        return virtualColumns.makeFloatColumnSelector(columnName, this);
                    }
                    final Integer dimIndex = index.getDimensionIndex(columnName);
                    if (dimIndex != null) {
                        final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(columnName);
                        final DimensionIndexer indexer = dimensionDesc.getIndexer();
                        return indexer.makeFloatColumnSelector(currEntry, dimensionDesc);
                    }
                    final Integer metricIndexInt = index.getMetricIndex(columnName);
                    if (metricIndexInt == null) {
                        return ZeroFloatColumnSelector.instance();
                    }
                    final int metricIndex = metricIndexInt;
                    return new FloatColumnSelector() {

                        @Override
                        public float get() {
                            return index.getMetricFloatValue(currEntry.getValue(), metricIndex);
                        }

                        @Override
                        public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                            inspector.visit("index", index);
                        }
                    };
                }

                @Override
                public LongColumnSelector makeLongColumnSelector(String columnName) {
                    if (virtualColumns.exists(columnName)) {
                        return virtualColumns.makeLongColumnSelector(columnName, this);
                    }
                    if (columnName.equals(Column.TIME_COLUMN_NAME)) {
                        class TimeLongColumnSelector implements LongColumnSelector {

                            @Override
                            public long get() {
                                return currEntry.getKey().getTimestamp();
                            }

                            @Override
                            public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                            }
                        }
                        return new TimeLongColumnSelector();
                    }
                    final Integer dimIndex = index.getDimensionIndex(columnName);
                    if (dimIndex != null) {
                        final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(columnName);
                        final DimensionIndexer indexer = dimensionDesc.getIndexer();
                        return indexer.makeLongColumnSelector(currEntry, dimensionDesc);
                    }
                    final Integer metricIndexInt = index.getMetricIndex(columnName);
                    if (metricIndexInt == null) {
                        return ZeroLongColumnSelector.instance();
                    }
                    final int metricIndex = metricIndexInt;
                    return new LongColumnSelector() {

                        @Override
                        public long get() {
                            return index.getMetricLongValue(currEntry.getValue(), metricIndex);
                        }

                        @Override
                        public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                            inspector.visit("index", index);
                        }
                    };
                }

                @Override
                public ObjectColumnSelector makeObjectColumnSelector(String column) {
                    if (virtualColumns.exists(column)) {
                        return virtualColumns.makeObjectColumnSelector(column, this);
                    }
                    if (column.equals(Column.TIME_COLUMN_NAME)) {
                        return new ObjectColumnSelector<Long>() {

                            @Override
                            public Class classOfObject() {
                                return Long.TYPE;
                            }

                            @Override
                            public Long get() {
                                return currEntry.getKey().getTimestamp();
                            }
                        };
                    }
                    final Integer metricIndexInt = index.getMetricIndex(column);
                    if (metricIndexInt != null) {
                        final int metricIndex = metricIndexInt;
                        final Class classOfObject = index.getMetricClass(column);
                        return new ObjectColumnSelector() {

                            @Override
                            public Class classOfObject() {
                                return classOfObject;
                            }

                            @Override
                            public Object get() {
                                return index.getMetricObjectValue(currEntry.getValue(), metricIndex);
                            }
                        };
                    }
                    IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(column);
                    if (dimensionDesc == null) {
                        return null;
                    } else {
                        final int dimensionIndex = dimensionDesc.getIndex();
                        final DimensionIndexer indexer = dimensionDesc.getIndexer();
                        return new ObjectColumnSelector<Object>() {

                            @Override
                            public Class classOfObject() {
                                return Object.class;
                            }

                            @Override
                            public Object get() {
                                IncrementalIndex.TimeAndDims key = currEntry.getKey();
                                if (key == null) {
                                    return null;
                                }
                                Object[] dims = key.getDims();
                                if (dimensionIndex >= dims.length) {
                                    return null;
                                }
                                return indexer.convertUnsortedEncodedKeyComponentToActualArrayOrList(dims[dimensionIndex], DimensionIndexer.ARRAY);
                            }
                        };
                    }
                }

                @Nullable
                @Override
                public ColumnCapabilities getColumnCapabilities(String columnName) {
                    if (virtualColumns.exists(columnName)) {
                        return virtualColumns.getColumnCapabilities(columnName);
                    }
                    return index.getCapabilities(columnName);
                }
            };
        }
    });
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) SingleScanTimeDimSelector(io.druid.segment.SingleScanTimeDimSelector) RuntimeShapeInspector(io.druid.query.monomorphicprocessing.RuntimeShapeInspector) Cursor(io.druid.segment.Cursor) DateTime(org.joda.time.DateTime) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities) DimensionIndexer(io.druid.segment.DimensionIndexer) ZeroLongColumnSelector(io.druid.segment.ZeroLongColumnSelector) LongColumnSelector(io.druid.segment.LongColumnSelector) ObjectColumnSelector(io.druid.segment.ObjectColumnSelector) LongWrappingDimensionSelector(io.druid.segment.LongWrappingDimensionSelector) NullDimensionSelector(io.druid.segment.NullDimensionSelector) DimensionSelector(io.druid.segment.DimensionSelector) FloatWrappingDimensionSelector(io.druid.segment.FloatWrappingDimensionSelector) BooleanValueMatcher(io.druid.segment.filter.BooleanValueMatcher) ValueMatcher(io.druid.query.filter.ValueMatcher) ZeroFloatColumnSelector(io.druid.segment.ZeroFloatColumnSelector) FloatColumnSelector(io.druid.segment.FloatColumnSelector) LongWrappingDimensionSelector(io.druid.segment.LongWrappingDimensionSelector) FloatWrappingDimensionSelector(io.druid.segment.FloatWrappingDimensionSelector) ExtractionFn(io.druid.query.extraction.ExtractionFn) Nullable(javax.annotation.Nullable) Interval(org.joda.time.Interval)

Example 4 with DimensionIndexer

use of io.druid.segment.DimensionIndexer in project druid by druid-io.

the class IncrementalIndex method toTimeAndDims.

@VisibleForTesting
TimeAndDims toTimeAndDims(InputRow row) throws IndexSizeExceededException {
    row = formatRow(row);
    if (row.getTimestampFromEpoch() < minTimestamp) {
        throw new IAE("Cannot add row[%s] because it is below the minTimestamp[%s]", row, new DateTime(minTimestamp));
    }
    final List<String> rowDimensions = row.getDimensions();
    Object[] dims;
    List<Object> overflow = null;
    synchronized (dimensionDescs) {
        dims = new Object[dimensionDescs.size()];
        for (String dimension : rowDimensions) {
            boolean wasNewDim = false;
            ColumnCapabilitiesImpl capabilities;
            DimensionDesc desc = dimensionDescs.get(dimension);
            if (desc != null) {
                capabilities = desc.getCapabilities();
            } else {
                wasNewDim = true;
                capabilities = columnCapabilities.get(dimension);
                if (capabilities == null) {
                    capabilities = new ColumnCapabilitiesImpl();
                    // For schemaless type discovery, assume everything is a String for now, can change later.
                    capabilities.setType(ValueType.STRING);
                    capabilities.setDictionaryEncoded(true);
                    capabilities.setHasBitmapIndexes(true);
                    columnCapabilities.put(dimension, capabilities);
                }
                DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null);
                desc = addNewDimension(dimension, capabilities, handler);
            }
            DimensionHandler handler = desc.getHandler();
            DimensionIndexer indexer = desc.getIndexer();
            Object dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension));
            // Set column capabilities as data is coming in
            if (!capabilities.hasMultipleValues() && dimsKey != null && handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) {
                capabilities.setHasMultipleValues(true);
            }
            if (wasNewDim) {
                if (overflow == null) {
                    overflow = Lists.newArrayList();
                }
                overflow.add(dimsKey);
            } else if (desc.getIndex() > dims.length || dims[desc.getIndex()] != null) {
                /*
           * index > dims.length requires that we saw this dimension and added it to the dimensionOrder map,
           * otherwise index is null. Since dims is initialized based on the size of dimensionOrder on each call to add,
           * it must have been added to dimensionOrder during this InputRow.
           *
           * if we found an index for this dimension it means we've seen it already. If !(index > dims.length) then
           * we saw it on a previous input row (this its safe to index into dims). If we found a value in
           * the dims array for this index, it means we have seen this dimension already on this input row.
           */
                throw new ISE("Dimension[%s] occurred more than once in InputRow", dimension);
            } else {
                dims[desc.getIndex()] = dimsKey;
            }
        }
    }
    if (overflow != null) {
        // Merge overflow and non-overflow
        Object[] newDims = new Object[dims.length + overflow.size()];
        System.arraycopy(dims, 0, newDims, 0, dims.length);
        for (int i = 0; i < overflow.size(); ++i) {
            newDims[dims.length + i] = overflow.get(i);
        }
        dims = newDims;
    }
    long truncated = 0;
    if (row.getTimestamp() != null) {
        truncated = gran.bucketStart(row.getTimestamp()).getMillis();
    }
    return new TimeAndDims(Math.max(truncated, minTimestamp), dims, dimensionDescsList);
}
Also used : DimensionHandler(io.druid.segment.DimensionHandler) IAE(io.druid.java.util.common.IAE) DateTime(org.joda.time.DateTime) DimensionIndexer(io.druid.segment.DimensionIndexer) ISE(io.druid.java.util.common.ISE) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 5 with DimensionIndexer

use of io.druid.segment.DimensionIndexer in project druid by druid-io.

the class IncrementalIndexAdapter method getRows.

@Override
public Iterable<Rowboat> getRows() {
    return new Iterable<Rowboat>() {

        @Override
        public Iterator<Rowboat> iterator() {
            final List<IncrementalIndex.DimensionDesc> dimensions = index.getDimensions();
            final DimensionHandler[] handlers = new DimensionHandler[dimensions.size()];
            final DimensionIndexer[] indexers = new DimensionIndexer[dimensions.size()];
            for (IncrementalIndex.DimensionDesc dimension : dimensions) {
                handlers[dimension.getIndex()] = dimension.getHandler();
                indexers[dimension.getIndex()] = dimension.getIndexer();
            }
            /*
         * Note that the transform function increments a counter to determine the rowNum of
         * the iterated Rowboats. We need to return a new iterator on each
         * iterator() call to ensure the counter starts at 0.
         */
            return Iterators.transform(index.getFacts().entrySet().iterator(), new Function<Map.Entry<IncrementalIndex.TimeAndDims, Integer>, Rowboat>() {

                int count = 0;

                @Override
                public Rowboat apply(Map.Entry<IncrementalIndex.TimeAndDims, Integer> input) {
                    final IncrementalIndex.TimeAndDims timeAndDims = input.getKey();
                    final Object[] dimValues = timeAndDims.getDims();
                    final int rowOffset = input.getValue();
                    Object[] dims = new Object[dimValues.length];
                    for (IncrementalIndex.DimensionDesc dimension : dimensions) {
                        final int dimIndex = dimension.getIndex();
                        if (dimIndex >= dimValues.length || dimValues[dimIndex] == null) {
                            continue;
                        }
                        final DimensionIndexer indexer = indexers[dimIndex];
                        Object sortedDimVals = indexer.convertUnsortedEncodedKeyComponentToSortedEncodedKeyComponent(dimValues[dimIndex]);
                        dims[dimIndex] = sortedDimVals;
                    }
                    Object[] metrics = new Object[index.getMetricAggs().length];
                    for (int i = 0; i < metrics.length; i++) {
                        metrics[i] = index.getMetricObjectValue(rowOffset, i);
                    }
                    return new Rowboat(timeAndDims.getTimestamp(), dims, metrics, count++, handlers);
                }
            });
        }
    };
}
Also used : DimensionHandler(io.druid.segment.DimensionHandler) DimensionIndexer(io.druid.segment.DimensionIndexer) Map(java.util.Map) Rowboat(io.druid.segment.Rowboat)

Aggregations

DimensionIndexer (io.druid.segment.DimensionIndexer)8 DimensionHandler (io.druid.segment.DimensionHandler)2 ColumnCapabilities (io.druid.segment.column.ColumnCapabilities)2 DateTime (org.joda.time.DateTime)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 MutableBitmap (io.druid.collections.bitmap.MutableBitmap)1 IAE (io.druid.java.util.common.IAE)1 ISE (io.druid.java.util.common.ISE)1 DimensionSpec (io.druid.query.dimension.DimensionSpec)1 ExtractionFn (io.druid.query.extraction.ExtractionFn)1 ValueMatcher (io.druid.query.filter.ValueMatcher)1 RuntimeShapeInspector (io.druid.query.monomorphicprocessing.RuntimeShapeInspector)1 Cursor (io.druid.segment.Cursor)1 DimensionSelector (io.druid.segment.DimensionSelector)1 FloatColumnSelector (io.druid.segment.FloatColumnSelector)1 FloatWrappingDimensionSelector (io.druid.segment.FloatWrappingDimensionSelector)1 LongColumnSelector (io.druid.segment.LongColumnSelector)1 LongWrappingDimensionSelector (io.druid.segment.LongWrappingDimensionSelector)1 NullDimensionSelector (io.druid.segment.NullDimensionSelector)1 ObjectColumnSelector (io.druid.segment.ObjectColumnSelector)1