Search in sources :

Example 36 with DimensionsSpec

use of io.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class SpatialFilterTest method makeMergedQueryableIndex.

private static QueryableIndex makeMergedQueryableIndex(IndexSpec indexSpec) {
    try {
        IncrementalIndex first = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, 1000);
        IncrementalIndex second = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, 1000);
        IncrementalIndex third = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, NUM_POINTS);
        first.add(new MapBasedInputRow(new DateTime("2013-01-01").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-01").toString(), "dim", "foo", "lat", 0.0f, "long", 0.0f, "val", 17L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-02").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-02").toString(), "dim", "foo", "lat", 1.0f, "long", 3.0f, "val", 29L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-03").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-03").toString(), "dim", "foo", "lat", 4.0f, "long", 2.0f, "val", 13L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "lat", "_mmx.unknown", "long", "_mmx.unknown", "val", 101L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "dim.geo", "_mmx.unknown", "val", 501L)));
        second.add(new MapBasedInputRow(new DateTime("2013-01-04").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-04").toString(), "dim", "foo", "lat", 7.0f, "long", 3.0f, "val", 91L)));
        second.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "lat", 8.0f, "long", 6.0f, "val", 47L)));
        second.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "lat2", 0.0f, "long2", 0.0f, "val", 13L)));
        // Add a bunch of random points
        Random rand = new Random();
        for (int i = 8; i < NUM_POINTS; i++) {
            third.add(new MapBasedInputRow(new DateTime("2013-01-01").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-01").toString(), "dim", "boo", "lat", (float) (rand.nextFloat() * 10 + 10.0), "long", (float) (rand.nextFloat() * 10 + 10.0), "val", i)));
        }
        File tmpFile = File.createTempFile("yay", "who");
        tmpFile.delete();
        File firstFile = new File(tmpFile, "first");
        File secondFile = new File(tmpFile, "second");
        File thirdFile = new File(tmpFile, "third");
        File mergedFile = new File(tmpFile, "merged");
        firstFile.mkdirs();
        firstFile.deleteOnExit();
        secondFile.mkdirs();
        secondFile.deleteOnExit();
        thirdFile.mkdirs();
        thirdFile.deleteOnExit();
        mergedFile.mkdirs();
        mergedFile.deleteOnExit();
        INDEX_MERGER.persist(first, DATA_INTERVAL, firstFile, indexSpec);
        INDEX_MERGER.persist(second, DATA_INTERVAL, secondFile, indexSpec);
        INDEX_MERGER.persist(third, DATA_INTERVAL, thirdFile, indexSpec);
        QueryableIndex mergedRealtime = INDEX_IO.loadIndex(INDEX_MERGER.mergeQueryableIndex(Arrays.asList(INDEX_IO.loadIndex(firstFile), INDEX_IO.loadIndex(secondFile), INDEX_IO.loadIndex(thirdFile)), true, METRIC_AGGS, mergedFile, indexSpec));
        return mergedRealtime;
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) Random(java.util.Random) QueryableIndex(io.druid.segment.QueryableIndex) SpatialDimensionSchema(io.druid.data.input.impl.SpatialDimensionSchema) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) File(java.io.File)

Example 37 with DimensionsSpec

use of io.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class IncrementalIndexTest method constructorFeeder.

@Parameterized.Parameters
public static Collection<?> constructorFeeder() throws IOException {
    DimensionsSpec dimensions = new DimensionsSpec(Arrays.<DimensionSchema>asList(new StringDimensionSchema("string"), new StringDimensionSchema("float"), new StringDimensionSchema("long")), null, null);
    AggregatorFactory[] metrics = { new FilteredAggregatorFactory(new CountAggregatorFactory("cnt"), new SelectorDimFilter("billy", "A", null)) };
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(0).withQueryGranularity(Granularities.MINUTE).withDimensionsSpec(dimensions).withMetrics(metrics).withRollup(true).build();
    final List<Object[]> constructors = Lists.newArrayList();
    for (final Boolean sortFacts : ImmutableList.of(false, true)) {
        constructors.add(new Object[] { new IndexCreator() {

            @Override
            public IncrementalIndex createIndex() {
                return new OnheapIncrementalIndex(schema, false, true, sortFacts, 1000);
            }
        } });
        constructors.add(new Object[] { new IndexCreator() {

            @Override
            public IncrementalIndex createIndex() {
                return new OffheapIncrementalIndex(schema, true, true, sortFacts, 1000000, new StupidPool<ByteBuffer>("OffheapIncrementalIndex-bufferPool", new Supplier<ByteBuffer>() {

                    @Override
                    public ByteBuffer get() {
                        return ByteBuffer.allocate(256 * 1024);
                    }
                }));
            }
        } });
    }
    return constructors;
}
Also used : FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) ByteBuffer(java.nio.ByteBuffer) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) StupidPool(io.druid.collections.StupidPool)

Example 38 with DimensionsSpec

use of io.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class OrcHadoopInputRowParserTest method testSerde.

@Test
public void testSerde() throws IOException {
    String parserString = "{\n" + "        \"type\": \"orc\",\n" + "        \"parseSpec\": {\n" + "          \"format\": \"timeAndDims\",\n" + "          \"timestampSpec\": {\n" + "            \"column\": \"timestamp\",\n" + "            \"format\": \"auto\"\n" + "          },\n" + "          \"dimensionsSpec\": {\n" + "            \"dimensions\": [\n" + "              \"col1\",\n" + "              \"col2\"\n" + "            ],\n" + "            \"dimensionExclusions\": [],\n" + "            \"spatialDimensions\": []\n" + "          }\n" + "        },\n" + "        \"typeString\": \"struct<timestamp:string,col1:string,col2:array<string>,val1:float>\"\n" + "      }";
    InputRowParser parser = mapper.readValue(parserString, InputRowParser.class);
    InputRowParser expected = new OrcHadoopInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("col1"), new StringDimensionSchema("col2")), null, null)), "struct<timestamp:string,col1:string,col2:array<string>,val1:float>");
    Assert.assertEquals(expected, parser);
}
Also used : TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) InputRowParser(io.druid.data.input.impl.InputRowParser) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) Test(org.junit.Test)

Example 39 with DimensionsSpec

use of io.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class GroupByQueryRunnerFactoryTest method createSegment.

private Segment createSegment() throws Exception {
    IncrementalIndex incrementalIndex = new OnheapIncrementalIndex(0, Granularities.NONE, new AggregatorFactory[] { new CountAggregatorFactory("count") }, true, true, true, 5000);
    StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags")), null, null), "\t", ImmutableList.of("timestamp", "product", "tags")), "UTF-8");
    String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1", "2011-01-13T00:00:00.000Z,product_2,t2", "2011-01-14T00:00:00.000Z,product_3,t2" };
    for (String row : rows) {
        incrementalIndex.add(parser.parse(row));
    }
    closerRule.closeLater(incrementalIndex);
    return new IncrementalIndexSegment(incrementalIndex, "test");
}
Also used : CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) CSVParseSpec(io.druid.data.input.impl.CSVParseSpec) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec)

Example 40 with DimensionsSpec

use of io.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class HadoopConverterJobTest method setUp.

@Before
public void setUp() throws Exception {
    final MetadataStorageUpdaterJobSpec metadataStorageUpdaterJobSpec = new MetadataStorageUpdaterJobSpec() {

        @Override
        public String getSegmentTable() {
            return derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable();
        }

        @Override
        public MetadataStorageConnectorConfig get() {
            return derbyConnectorRule.getMetadataConnectorConfig();
        }
    };
    final File scratchFileDir = temporaryFolder.newFolder();
    storageLocProperty = System.getProperty(STORAGE_PROPERTY_KEY);
    tmpSegmentDir = temporaryFolder.newFolder();
    System.setProperty(STORAGE_PROPERTY_KEY, tmpSegmentDir.getAbsolutePath());
    final URL url = Preconditions.checkNotNull(Query.class.getClassLoader().getResource("druid.sample.tsv"));
    final File tmpInputFile = temporaryFolder.newFile();
    FileUtils.retryCopy(new ByteSource() {

        @Override
        public InputStream openStream() throws IOException {
            return url.openStream();
        }
    }, tmpInputFile, FileUtils.IS_EXCEPTION, 3);
    final HadoopDruidIndexerConfig hadoopDruidIndexerConfig = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema(DATASOURCE, HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList(TestIndex.DIMENSIONS)), null, null), "\t", "", Arrays.asList(TestIndex.COLUMNS)), null), Map.class), new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.METRICS[0], TestIndex.METRICS[0]), new HyperUniquesAggregatorFactory("quality_uniques", "quality") }, new UniformGranularitySpec(Granularities.MONTH, Granularities.DAY, ImmutableList.<Interval>of(interval)), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.<String, Object>of("type", "static", "paths", tmpInputFile.getAbsolutePath()), metadataStorageUpdaterJobSpec, tmpSegmentDir.getAbsolutePath()), new HadoopTuningConfig(scratchFileDir.getAbsolutePath(), null, null, null, null, null, false, false, false, false, null, false, false, null, null, null, false, false)));
    metadataStorageTablesConfigSupplier = derbyConnectorRule.metadataTablesConfigSupplier();
    connector = derbyConnectorRule.getConnector();
    try {
        connector.getDBI().withHandle(new HandleCallback<Void>() {

            @Override
            public Void withHandle(Handle handle) throws Exception {
                handle.execute("DROP TABLE druid_segments");
                return null;
            }
        });
    } catch (CallbackFailedException e) {
    // Who cares
    }
    List<Jobby> jobs = ImmutableList.of(new Jobby() {

        @Override
        public boolean run() {
            connector.createSegmentTable(metadataStorageUpdaterJobSpec.getSegmentTable());
            return true;
        }
    }, new HadoopDruidDetermineConfigurationJob(hadoopDruidIndexerConfig), new HadoopDruidIndexerJob(hadoopDruidIndexerConfig, new SQLMetadataStorageUpdaterJobHandler(connector)));
    JobHelper.runJobs(jobs, hadoopDruidIndexerConfig);
}
Also used : HadoopIngestionSpec(io.druid.indexer.HadoopIngestionSpec) HadoopTuningConfig(io.druid.indexer.HadoopTuningConfig) URL(java.net.URL) HadoopIOConfig(io.druid.indexer.HadoopIOConfig) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) TimestampSpec(io.druid.data.input.impl.TimestampSpec) SQLMetadataStorageUpdaterJobHandler(io.druid.indexer.SQLMetadataStorageUpdaterJobHandler) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) InputStream(java.io.InputStream) DelimitedParseSpec(io.druid.data.input.impl.DelimitedParseSpec) IOException(java.io.IOException) HadoopDruidIndexerConfig(io.druid.indexer.HadoopDruidIndexerConfig) IOException(java.io.IOException) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) Handle(org.skife.jdbi.v2.Handle) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) DataSchema(io.druid.segment.indexing.DataSchema) Jobby(io.druid.indexer.Jobby) HadoopDruidIndexerJob(io.druid.indexer.HadoopDruidIndexerJob) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ByteSource(com.google.common.io.ByteSource) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HadoopDruidDetermineConfigurationJob(io.druid.indexer.HadoopDruidDetermineConfigurationJob) Interval(org.joda.time.Interval) Before(org.junit.Before)

Aggregations

DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)41 TimestampSpec (io.druid.data.input.impl.TimestampSpec)29 JSONParseSpec (io.druid.data.input.impl.JSONParseSpec)16 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)16 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)16 Test (org.junit.Test)14 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)13 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)11 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)10 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)10 DateTime (org.joda.time.DateTime)9 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)8 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)8 DataSchema (io.druid.segment.indexing.DataSchema)8 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)8 File (java.io.File)8 Map (java.util.Map)8 SpatialDimensionSchema (io.druid.data.input.impl.SpatialDimensionSchema)7 CSVParseSpec (io.druid.data.input.impl.CSVParseSpec)6 StringDimensionSchema (io.druid.data.input.impl.StringDimensionSchema)6