Search in sources :

Example 1 with DruidTable

use of io.druid.sql.calcite.table.DruidTable in project druid by druid-io.

the class DruidSchema method start.

@LifecycleStart
public void start() {
    cacheExec.submit(new Runnable() {

        @Override
        public void run() {
            try {
                while (!Thread.currentThread().isInterrupted()) {
                    final Set<String> dataSources = Sets.newHashSet();
                    try {
                        synchronized (lock) {
                            final long nextRefresh = new DateTime(lastRefresh).plus(config.getMetadataRefreshPeriod()).getMillis();
                            while (!(isServerViewInitialized && !dataSourcesNeedingRefresh.isEmpty() && (refreshImmediately || nextRefresh < System.currentTimeMillis()))) {
                                lock.wait(Math.max(1, nextRefresh - System.currentTimeMillis()));
                            }
                            dataSources.addAll(dataSourcesNeedingRefresh);
                            dataSourcesNeedingRefresh.clear();
                            lastRefresh = System.currentTimeMillis();
                            refreshImmediately = false;
                        }
                        // Refresh dataSources.
                        for (final String dataSource : dataSources) {
                            log.debug("Refreshing metadata for dataSource[%s].", dataSource);
                            final long startTime = System.currentTimeMillis();
                            final DruidTable druidTable = computeTable(dataSource);
                            if (druidTable == null) {
                                if (tables.remove(dataSource) != null) {
                                    log.info("Removed dataSource[%s] from the list of active dataSources.", dataSource);
                                }
                            } else {
                                tables.put(dataSource, druidTable);
                                log.info("Refreshed metadata for dataSource[%s] in %,dms.", dataSource, System.currentTimeMillis() - startTime);
                            }
                        }
                        initializationLatch.countDown();
                    } catch (InterruptedException e) {
                        // Fall through.
                        throw e;
                    } catch (Exception e) {
                        log.warn(e, "Metadata refresh failed for dataSources[%s], trying again soon.", Joiner.on(", ").join(dataSources));
                        synchronized (lock) {
                            // Add dataSources back to the refresh list.
                            dataSourcesNeedingRefresh.addAll(dataSources);
                            lock.notifyAll();
                        }
                    }
                }
            } catch (InterruptedException e) {
            // Just exit.
            } catch (Throwable e) {
                // Throwables that fall out to here (not caught by an inner try/catch) are potentially gnarly, like
                // OOMEs. Anyway, let's just emit an alert and stop refreshing metadata.
                log.makeAlert(e, "Metadata refresh failed permanently").emit();
                throw e;
            } finally {
                log.info("Metadata refresh stopped.");
            }
        }
    });
    serverView.registerSegmentCallback(MoreExecutors.sameThreadExecutor(), new ServerView.SegmentCallback() {

        @Override
        public ServerView.CallbackAction segmentViewInitialized() {
            synchronized (lock) {
                isServerViewInitialized = true;
                lock.notifyAll();
            }
            return ServerView.CallbackAction.CONTINUE;
        }

        @Override
        public ServerView.CallbackAction segmentAdded(DruidServerMetadata server, DataSegment segment) {
            synchronized (lock) {
                dataSourcesNeedingRefresh.add(segment.getDataSource());
                if (!tables.containsKey(segment.getDataSource())) {
                    refreshImmediately = true;
                }
                lock.notifyAll();
            }
            return ServerView.CallbackAction.CONTINUE;
        }

        @Override
        public ServerView.CallbackAction segmentRemoved(DruidServerMetadata server, DataSegment segment) {
            synchronized (lock) {
                dataSourcesNeedingRefresh.add(segment.getDataSource());
                lock.notifyAll();
            }
            return ServerView.CallbackAction.CONTINUE;
        }
    });
    serverView.registerServerCallback(MoreExecutors.sameThreadExecutor(), new ServerView.ServerCallback() {

        @Override
        public ServerView.CallbackAction serverRemoved(DruidServer server) {
            final List<String> dataSourceNames = Lists.newArrayList();
            for (DruidDataSource druidDataSource : server.getDataSources()) {
                dataSourceNames.add(druidDataSource.getName());
            }
            synchronized (lock) {
                dataSourcesNeedingRefresh.addAll(dataSourceNames);
                lock.notifyAll();
            }
            return ServerView.CallbackAction.CONTINUE;
        }
    });
}
Also used : EnumSet(java.util.EnumSet) Set(java.util.Set) DruidTable(io.druid.sql.calcite.table.DruidTable) DruidServer(io.druid.client.DruidServer) DruidServerMetadata(io.druid.server.coordination.DruidServerMetadata) DataSegment(io.druid.timeline.DataSegment) DruidDataSource(io.druid.client.DruidDataSource) DateTime(org.joda.time.DateTime) ServerView(io.druid.client.ServerView) TimelineServerView(io.druid.client.TimelineServerView) List(java.util.List) LifecycleStart(io.druid.java.util.common.lifecycle.LifecycleStart)

Example 2 with DruidTable

use of io.druid.sql.calcite.table.DruidTable in project druid by druid-io.

the class DruidSchema method computeTable.

private DruidTable computeTable(final String dataSource) {
    final SegmentMetadataQuery segmentMetadataQuery = new SegmentMetadataQuery(new TableDataSource(dataSource), null, null, false, ImmutableMap.<String, Object>of("useCache", false, "populateCache", false), EnumSet.of(SegmentMetadataQuery.AnalysisType.INTERVAL), null, true);
    final Sequence<SegmentAnalysis> sequence = segmentMetadataQuery.run(walker, Maps.<String, Object>newHashMap());
    final List<SegmentAnalysis> results = Sequences.toList(sequence, Lists.<SegmentAnalysis>newArrayList());
    if (results.isEmpty()) {
        return null;
    }
    final Map<String, ValueType> columnTypes = Maps.newLinkedHashMap();
    // Resolve conflicts by taking the latest metadata. This aids in gradual schema evolution.
    long maxTimestamp = JodaUtils.MIN_INSTANT;
    for (SegmentAnalysis analysis : results) {
        final long timestamp;
        if (analysis.getIntervals() != null && analysis.getIntervals().size() > 0) {
            timestamp = analysis.getIntervals().get(analysis.getIntervals().size() - 1).getEndMillis();
        } else {
            timestamp = JodaUtils.MIN_INSTANT;
        }
        for (Map.Entry<String, ColumnAnalysis> entry : analysis.getColumns().entrySet()) {
            if (entry.getValue().isError()) {
                // Skip columns with analysis errors.
                continue;
            }
            if (!columnTypes.containsKey(entry.getKey()) || timestamp >= maxTimestamp) {
                ValueType valueType;
                try {
                    valueType = ValueType.valueOf(entry.getValue().getType().toUpperCase());
                } catch (IllegalArgumentException e) {
                    // Assume unrecognized types are some flavor of COMPLEX. This throws away information about exactly
                    // what kind of complex column it is, which we may want to preserve some day.
                    valueType = ValueType.COMPLEX;
                }
                columnTypes.put(entry.getKey(), valueType);
                maxTimestamp = timestamp;
            }
        }
    }
    final RowSignature.Builder rowSignature = RowSignature.builder();
    for (Map.Entry<String, ValueType> entry : columnTypes.entrySet()) {
        rowSignature.add(entry.getKey(), entry.getValue());
    }
    return new DruidTable(new TableDataSource(dataSource), rowSignature.build());
}
Also used : ValueType(io.druid.segment.column.ValueType) DruidTable(io.druid.sql.calcite.table.DruidTable) TableDataSource(io.druid.query.TableDataSource) SegmentMetadataQuery(io.druid.query.metadata.metadata.SegmentMetadataQuery) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) RowSignature(io.druid.sql.calcite.table.RowSignature)

Example 3 with DruidTable

use of io.druid.sql.calcite.table.DruidTable in project druid by druid-io.

the class SqlBenchmark method setup.

@Setup(Level.Trial)
public void setup() throws Exception {
    tmpDir = Files.createTempDir();
    log.info("Starting benchmark setup using tmpDir[%s], rows[%,d].", tmpDir, rowsPerSegment);
    if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
        ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
    }
    final BenchmarkSchemaInfo schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get("basic");
    final BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + 1, schemaInfo.getDataInterval(), rowsPerSegment);
    final List<InputRow> rows = Lists.newArrayList();
    for (int i = 0; i < rowsPerSegment; i++) {
        final InputRow row = dataGenerator.nextRow();
        if (i % 20000 == 0) {
            log.info("%,d/%,d rows generated.", i, rowsPerSegment);
        }
        rows.add(row);
    }
    log.info("%,d/%,d rows generated.", rows.size(), rowsPerSegment);
    final PlannerConfig plannerConfig = new PlannerConfig();
    final QueryRunnerFactoryConglomerate conglomerate = CalciteTests.queryRunnerFactoryConglomerate();
    final QueryableIndex index = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).indexMerger(TestHelper.getTestIndexMergerV9()).rows(rows).buildMMappedIndex();
    this.walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource("foo").interval(index.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index);
    final Map<String, Table> tableMap = ImmutableMap.<String, Table>of("foo", new DruidTable(new TableDataSource("foo"), RowSignature.builder().add("__time", ValueType.LONG).add("dimSequential", ValueType.STRING).add("dimZipf", ValueType.STRING).add("dimUniform", ValueType.STRING).build()));
    final Schema druidSchema = new AbstractSchema() {

        @Override
        protected Map<String, Table> getTableMap() {
            return tableMap;
        }
    };
    plannerFactory = new PlannerFactory(Calcites.createRootSchema(druidSchema), walker, CalciteTests.createOperatorTable(), plannerConfig);
    groupByQuery = GroupByQuery.builder().setDataSource("foo").setInterval(new Interval(JodaUtils.MIN_INSTANT, JodaUtils.MAX_INSTANT)).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("dimZipf", "d0"), new DefaultDimensionSpec("dimSequential", "d1"))).setAggregatorSpecs(Arrays.<AggregatorFactory>asList(new CountAggregatorFactory("c"))).setGranularity(Granularities.ALL).build();
    sqlQuery = "SELECT\n" + "  dimZipf AS d0," + "  dimSequential AS d1,\n" + "  COUNT(*) AS c\n" + "FROM druid.foo\n" + "GROUP BY dimZipf, dimSequential";
}
Also used : DruidTable(io.druid.sql.calcite.table.DruidTable) Table(org.apache.calcite.schema.Table) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) Schema(org.apache.calcite.schema.Schema) AbstractSchema(org.apache.calcite.schema.impl.AbstractSchema) BenchmarkDataGenerator(io.druid.benchmark.datagen.BenchmarkDataGenerator) HyperUniquesSerde(io.druid.query.aggregation.hyperloglog.HyperUniquesSerde) DruidTable(io.druid.sql.calcite.table.DruidTable) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) QueryRunnerFactoryConglomerate(io.druid.query.QueryRunnerFactoryConglomerate) SpecificSegmentsQuerySegmentWalker(io.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) TableDataSource(io.druid.query.TableDataSource) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AbstractSchema(org.apache.calcite.schema.impl.AbstractSchema) QueryableIndex(io.druid.segment.QueryableIndex) BenchmarkSchemaInfo(io.druid.benchmark.datagen.BenchmarkSchemaInfo) PlannerConfig(io.druid.sql.calcite.planner.PlannerConfig) InputRow(io.druid.data.input.InputRow) PlannerFactory(io.druid.sql.calcite.planner.PlannerFactory) File(java.io.File) Interval(org.joda.time.Interval) Setup(org.openjdk.jmh.annotations.Setup)

Example 4 with DruidTable

use of io.druid.sql.calcite.table.DruidTable in project druid by druid-io.

the class DruidTableScanRule method onMatch.

@Override
public void onMatch(final RelOptRuleCall call) {
    final LogicalTableScan scan = call.rel(0);
    final RelOptTable table = scan.getTable();
    final DruidTable druidTable = table.unwrap(DruidTable.class);
    if (druidTable != null) {
        call.transformTo(DruidQueryRel.fullScan(scan.getCluster(), table, druidTable, queryMaker));
    }
}
Also used : DruidTable(io.druid.sql.calcite.table.DruidTable) RelOptTable(org.apache.calcite.plan.RelOptTable) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan)

Example 5 with DruidTable

use of io.druid.sql.calcite.table.DruidTable in project druid by druid-io.

the class DruidSchemaTest method testGetTableMap.

@Test
public void testGetTableMap() {
    Assert.assertEquals(ImmutableSet.of("foo", "foo2"), schema.getTableNames());
    final Map<String, Table> tableMap = schema.getTableMap();
    Assert.assertEquals(ImmutableSet.of("foo", "foo2"), tableMap.keySet());
    final DruidTable fooTable = (DruidTable) tableMap.get("foo");
    final RelDataType rowType = fooTable.getRowType(new JavaTypeFactoryImpl());
    final List<RelDataTypeField> fields = rowType.getFieldList();
    Assert.assertEquals(6, fields.size());
    Assert.assertEquals("__time", fields.get(0).getName());
    Assert.assertEquals(SqlTypeName.TIMESTAMP, fields.get(0).getType().getSqlTypeName());
    Assert.assertEquals("cnt", fields.get(1).getName());
    Assert.assertEquals(SqlTypeName.BIGINT, fields.get(1).getType().getSqlTypeName());
    Assert.assertEquals("dim1", fields.get(2).getName());
    Assert.assertEquals(SqlTypeName.VARCHAR, fields.get(2).getType().getSqlTypeName());
    Assert.assertEquals("m1", fields.get(3).getName());
    Assert.assertEquals(SqlTypeName.BIGINT, fields.get(3).getType().getSqlTypeName());
    Assert.assertEquals("unique_dim1", fields.get(4).getName());
    Assert.assertEquals(SqlTypeName.OTHER, fields.get(4).getType().getSqlTypeName());
    Assert.assertEquals("dim2", fields.get(5).getName());
    Assert.assertEquals(SqlTypeName.VARCHAR, fields.get(5).getType().getSqlTypeName());
}
Also used : RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) DruidTable(io.druid.sql.calcite.table.DruidTable) Table(org.apache.calcite.schema.Table) JavaTypeFactoryImpl(org.apache.calcite.jdbc.JavaTypeFactoryImpl) DruidTable(io.druid.sql.calcite.table.DruidTable) RelDataType(org.apache.calcite.rel.type.RelDataType) Test(org.junit.Test)

Aggregations

DruidTable (io.druid.sql.calcite.table.DruidTable)5 TableDataSource (io.druid.query.TableDataSource)2 Table (org.apache.calcite.schema.Table)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 BenchmarkDataGenerator (io.druid.benchmark.datagen.BenchmarkDataGenerator)1 BenchmarkSchemaInfo (io.druid.benchmark.datagen.BenchmarkSchemaInfo)1 DruidDataSource (io.druid.client.DruidDataSource)1 DruidServer (io.druid.client.DruidServer)1 ServerView (io.druid.client.ServerView)1 TimelineServerView (io.druid.client.TimelineServerView)1 InputRow (io.druid.data.input.InputRow)1 LifecycleStart (io.druid.java.util.common.lifecycle.LifecycleStart)1 QueryRunnerFactoryConglomerate (io.druid.query.QueryRunnerFactoryConglomerate)1 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)1 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)1 HyperUniquesSerde (io.druid.query.aggregation.hyperloglog.HyperUniquesSerde)1 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)1 ColumnAnalysis (io.druid.query.metadata.metadata.ColumnAnalysis)1 SegmentAnalysis (io.druid.query.metadata.metadata.SegmentAnalysis)1 SegmentMetadataQuery (io.druid.query.metadata.metadata.SegmentMetadataQuery)1