Search in sources :

Example 11 with ColumnAnalysis

use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.

the class SegmentAnalyzer method analyze.

public Map<String, ColumnAnalysis> analyze(Segment segment) {
    Preconditions.checkNotNull(segment, "segment");
    // index is null for incremental-index-based segments, but storageAdapter is always available
    final QueryableIndex index = segment.asQueryableIndex();
    final StorageAdapter storageAdapter = segment.asStorageAdapter();
    // get length and column names from storageAdapter
    final int length = storageAdapter.getNumRows();
    final Set<String> columnNames = Sets.newHashSet();
    Iterables.addAll(columnNames, storageAdapter.getAvailableDimensions());
    Iterables.addAll(columnNames, storageAdapter.getAvailableMetrics());
    Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
    for (String columnName : columnNames) {
        final Column column = index == null ? null : index.getColumn(columnName);
        final ColumnCapabilities capabilities = column != null ? column.getCapabilities() : storageAdapter.getColumnCapabilities(columnName);
        final ColumnAnalysis analysis;
        final ValueType type = capabilities.getType();
        switch(type) {
            case LONG:
                analysis = analyzeNumericColumn(capabilities, length, Longs.BYTES);
                break;
            case FLOAT:
                analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
                break;
            case STRING:
                if (index != null) {
                    analysis = analyzeStringColumn(capabilities, column);
                } else {
                    analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
                }
                break;
            case COMPLEX:
                analysis = analyzeComplexColumn(capabilities, column, storageAdapter.getColumnTypeName(columnName));
                break;
            default:
                log.warn("Unknown column type[%s].", type);
                analysis = ColumnAnalysis.error(String.format("unknown_type_%s", type));
        }
        columns.put(columnName, analysis);
    }
    // Add time column too
    ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(Column.TIME_COLUMN_NAME);
    if (timeCapabilities == null) {
        timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false);
    }
    columns.put(Column.TIME_COLUMN_NAME, analyzeNumericColumn(timeCapabilities, length, NUM_BYTES_IN_TIMESTAMP));
    return columns;
}
Also used : ComplexColumn(io.druid.segment.column.ComplexColumn) Column(io.druid.segment.column.Column) ValueType(io.druid.segment.column.ValueType) QueryableIndex(io.druid.segment.QueryableIndex) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) StorageAdapter(io.druid.segment.StorageAdapter) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl)

Example 12 with ColumnAnalysis

use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.

the class SegmentAnalyzerTest method testIncrementalWorksHelper.

private void testIncrementalWorksHelper(EnumSet<SegmentMetadataQuery.AnalysisType> analyses) throws Exception {
    final List<SegmentAnalysis> results = getSegmentAnalysises(new IncrementalIndexSegment(TestIndex.getIncrementalTestIndex(), null), analyses);
    Assert.assertEquals(1, results.size());
    final SegmentAnalysis analysis = results.get(0);
    Assert.assertEquals(null, analysis.getId());
    final Map<String, ColumnAnalysis> columns = analysis.getColumns();
    Assert.assertEquals(TestIndex.COLUMNS.length, columns.size());
    for (DimensionSchema schema : TestIndex.DIMENSION_SCHEMAS) {
        final String dimension = schema.getName();
        final ColumnAnalysis columnAnalysis = columns.get(dimension);
        final boolean isString = schema.getValueType().name().equals(ValueType.STRING.name());
        Assert.assertEquals(dimension, schema.getValueType().name(), columnAnalysis.getType());
        Assert.assertEquals(dimension, 0, columnAnalysis.getSize());
        if (isString) {
            if (analyses == null) {
                Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
            } else {
                Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue());
            }
        } else {
            Assert.assertNull(dimension, columnAnalysis.getCardinality());
        }
    }
    for (String metric : TestIndex.METRICS) {
        final ColumnAnalysis columnAnalysis = columns.get(metric);
        Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
        Assert.assertEquals(metric, 0, columnAnalysis.getSize());
        Assert.assertNull(metric, columnAnalysis.getCardinality());
    }
}
Also used : IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) DimensionSchema(io.druid.data.input.impl.DimensionSchema)

Example 13 with ColumnAnalysis

use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.

the class SegmentMetadataQueryTest method testSegmentMetadataQueryWithDefaultAnalysisMerge2.

@Test
public void testSegmentMetadataQueryWithDefaultAnalysisMerge2() {
    ColumnAnalysis analysis = new ColumnAnalysis(ValueType.STRING.toString(), false, (mmap1 ? 6882 : 6808) + (mmap2 ? 6882 : 6808), 3, "spot", "upfront", null);
    testSegmentMetadataQueryWithDefaultAnalysisMerge("market", analysis);
}
Also used : ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) Test(org.junit.Test)

Example 14 with ColumnAnalysis

use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.

the class SegmentMetadataQueryTest method testSegmentMetadataQueryWithTimestampSpecMerge.

@Test
public void testSegmentMetadataQueryWithTimestampSpecMerge() {
    SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(differentIds ? "merged" : "testSegment", null, ImmutableMap.of("placement", new ColumnAnalysis(ValueType.STRING.toString(), false, 0, 0, null, null, null)), 0, expectedSegmentAnalysis1.getNumRows() + expectedSegmentAnalysis2.getNumRows(), null, new TimestampSpec("ds", "auto", null), null, null);
    QueryToolChest toolChest = FACTORY.getToolchest();
    ExecutorService exec = Executors.newCachedThreadPool();
    QueryRunner myRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(FACTORY.mergeRunners(MoreExecutors.sameThreadExecutor(), Lists.<QueryRunner<SegmentAnalysis>>newArrayList(toolChest.preMergeQueryDecoration(runner1), toolChest.preMergeQueryDecoration(runner2)))), toolChest);
    TestHelper.assertExpectedObjects(ImmutableList.of(mergedSegmentAnalysis), myRunner.run(Druids.newSegmentMetadataQueryBuilder().dataSource("testing").intervals("2013/2014").toInclude(new ListColumnIncluderator(Arrays.asList("placement"))).analysisTypes(SegmentMetadataQuery.AnalysisType.TIMESTAMPSPEC).merge(true).build(), Maps.newHashMap()), "failed SegmentMetadata merging query");
    exec.shutdownNow();
}
Also used : FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) ListColumnIncluderator(io.druid.query.metadata.metadata.ListColumnIncluderator) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) TimestampSpec(io.druid.data.input.impl.TimestampSpec) ExecutorService(java.util.concurrent.ExecutorService) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) QueryToolChest(io.druid.query.QueryToolChest) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) QueryRunner(io.druid.query.QueryRunner) Test(org.junit.Test)

Example 15 with ColumnAnalysis

use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.

the class SegmentMetadataQueryTest method testSegmentMetadataQueryWithAggregatorsMerge.

@Test
public void testSegmentMetadataQueryWithAggregatorsMerge() {
    final Map<String, AggregatorFactory> expectedAggregators = Maps.newHashMap();
    for (AggregatorFactory agg : TestIndex.METRIC_AGGS) {
        expectedAggregators.put(agg.getName(), agg.getCombiningFactory());
    }
    SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(differentIds ? "merged" : "testSegment", null, ImmutableMap.of("placement", new ColumnAnalysis(ValueType.STRING.toString(), false, 0, 0, null, null, null)), 0, expectedSegmentAnalysis1.getNumRows() + expectedSegmentAnalysis2.getNumRows(), expectedAggregators, null, null, null);
    QueryToolChest toolChest = FACTORY.getToolchest();
    ExecutorService exec = Executors.newCachedThreadPool();
    QueryRunner myRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(FACTORY.mergeRunners(MoreExecutors.sameThreadExecutor(), Lists.<QueryRunner<SegmentAnalysis>>newArrayList(toolChest.preMergeQueryDecoration(runner1), toolChest.preMergeQueryDecoration(runner2)))), toolChest);
    TestHelper.assertExpectedObjects(ImmutableList.of(mergedSegmentAnalysis), myRunner.run(Druids.newSegmentMetadataQueryBuilder().dataSource("testing").intervals("2013/2014").toInclude(new ListColumnIncluderator(Arrays.asList("placement"))).analysisTypes(SegmentMetadataQuery.AnalysisType.AGGREGATORS).merge(true).build(), Maps.newHashMap()), "failed SegmentMetadata merging query");
    exec.shutdownNow();
}
Also used : FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) ListColumnIncluderator(io.druid.query.metadata.metadata.ListColumnIncluderator) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) ExecutorService(java.util.concurrent.ExecutorService) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) QueryToolChest(io.druid.query.QueryToolChest) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) QueryRunner(io.druid.query.QueryRunner) Test(org.junit.Test)

Aggregations

ColumnAnalysis (io.druid.query.metadata.metadata.ColumnAnalysis)23 SegmentAnalysis (io.druid.query.metadata.metadata.SegmentAnalysis)16 Test (org.junit.Test)12 QueryRunner (io.druid.query.QueryRunner)9 ListColumnIncluderator (io.druid.query.metadata.metadata.ListColumnIncluderator)9 FinalizeResultsQueryRunner (io.druid.query.FinalizeResultsQueryRunner)8 QueryToolChest (io.druid.query.QueryToolChest)8 ExecutorService (java.util.concurrent.ExecutorService)8 SegmentMetadataQuery (io.druid.query.metadata.metadata.SegmentMetadataQuery)6 Interval (org.joda.time.Interval)4 TimestampSpec (io.druid.data.input.impl.TimestampSpec)3 Query (io.druid.query.Query)3 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)3 Map (java.util.Map)3 DimensionSchema (io.druid.data.input.impl.DimensionSchema)2 Granularity (io.druid.java.util.common.granularity.Granularity)2 TableDataSource (io.druid.query.TableDataSource)2 ComplexColumn (io.druid.segment.column.ComplexColumn)2 ValueType (io.druid.segment.column.ValueType)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1