Search in sources :

Example 16 with DimensionsSpec

use of io.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class IndexMergerTest method testMergeWithDimensionsList.

@Test
public void testMergeWithDimensionsList() throws Exception {
    IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dimA", "dimB", "dimC")), null, null)).withMinTimestamp(0L).withQueryGranularity(Granularities.NONE).withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("count") }).build();
    IncrementalIndex toPersist1 = new OnheapIncrementalIndex(schema, true, 1000);
    IncrementalIndex toPersist2 = new OnheapIncrementalIndex(schema, true, 1000);
    IncrementalIndex toPersist3 = new OnheapIncrementalIndex(schema, true, 1000);
    addDimValuesToIndex(toPersist1, "dimA", Arrays.asList("1", "2"));
    addDimValuesToIndex(toPersist2, "dimA", Arrays.asList("1", "2"));
    addDimValuesToIndex(toPersist3, "dimC", Arrays.asList("1", "2"));
    final File tmpDir = temporaryFolder.newFolder();
    final File tmpDir2 = temporaryFolder.newFolder();
    final File tmpDir3 = temporaryFolder.newFolder();
    final File tmpDirMerged = temporaryFolder.newFolder();
    QueryableIndex index1 = closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.persist(toPersist1, tmpDir, indexSpec)));
    QueryableIndex index2 = closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.persist(toPersist2, tmpDir2, indexSpec)));
    QueryableIndex index3 = closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.persist(toPersist3, tmpDir3, indexSpec)));
    final QueryableIndex merged = closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.mergeQueryableIndex(Arrays.asList(index1, index2, index3), true, new AggregatorFactory[] { new CountAggregatorFactory("count") }, tmpDirMerged, indexSpec)));
    final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
    final List<Rowboat> boatList = ImmutableList.copyOf(adapter.getRows());
    Assert.assertEquals(ImmutableList.of("dimA", "dimC"), ImmutableList.copyOf(adapter.getDimensionNames()));
    Assert.assertEquals(4, boatList.size());
    Assert.assertArrayEquals(new int[][] { { 0 }, { 1 } }, boatList.get(0).getDims());
    Assert.assertArrayEquals(new Object[] { 1L }, boatList.get(0).getMetrics());
    Assert.assertArrayEquals(new int[][] { { 0 }, { 2 } }, boatList.get(1).getDims());
    Assert.assertArrayEquals(new Object[] { 1L }, boatList.get(1).getMetrics());
    Assert.assertArrayEquals(new int[][] { { 1 }, { 0 } }, boatList.get(2).getDims());
    Assert.assertArrayEquals(new Object[] { 2L }, boatList.get(2).getMetrics());
    Assert.assertArrayEquals(new int[][] { { 2 }, { 0 } }, boatList.get(3).getDims());
    Assert.assertArrayEquals(new Object[] { 2L }, boatList.get(3).getMetrics());
    checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dimA", ""));
    checkBitmapIndex(Lists.newArrayList(2), adapter.getBitmapIndex("dimA", "1"));
    checkBitmapIndex(Lists.newArrayList(3), adapter.getBitmapIndex("dimA", "2"));
    checkBitmapIndex(new ArrayList<Integer>(), adapter.getBitmapIndex("dimB", ""));
    checkBitmapIndex(Lists.newArrayList(2, 3), adapter.getBitmapIndex("dimC", ""));
    checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dimC", "1"));
    checkBitmapIndex(Lists.newArrayList(1), adapter.getBitmapIndex("dimC", "2"));
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) File(java.io.File) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) IncrementalIndexTest(io.druid.segment.data.IncrementalIndexTest) Test(org.junit.Test)

Example 17 with DimensionsSpec

use of io.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class IndexMergerTest method persistAndLoad.

private QueryableIndex persistAndLoad(List<DimensionSchema> schema, InputRow... rows) throws IOException {
    IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null, new DimensionsSpec(schema, null, null));
    for (InputRow row : rows) {
        toPersist.add(row);
    }
    final File tempDir = temporaryFolder.newFolder();
    return closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.persist(toPersist, tempDir, indexSpec)));
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) File(java.io.File)

Example 18 with DimensionsSpec

use of io.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class IndexMergerV9WithSpatialIndexTest method makeIncrementalIndex.

private static IncrementalIndex makeIncrementalIndex() throws IOException {
    IncrementalIndex theIndex = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, NUM_POINTS);
    theIndex.add(new MapBasedInputRow(new DateTime("2013-01-01").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-01").toString(), "dim", "foo", "lat", 0.0f, "long", 0.0f, "val", 17L)));
    theIndex.add(new MapBasedInputRow(new DateTime("2013-01-02").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-02").toString(), "dim", "foo", "lat", 1.0f, "long", 3.0f, "val", 29L)));
    theIndex.add(new MapBasedInputRow(new DateTime("2013-01-03").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-03").toString(), "dim", "foo", "lat", 4.0f, "long", 2.0f, "val", 13L)));
    theIndex.add(new MapBasedInputRow(new DateTime("2013-01-04").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-04").toString(), "dim", "foo", "lat", 7.0f, "long", 3.0f, "val", 91L)));
    theIndex.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "lat", 8.0f, "long", 6.0f, "val", 47L)));
    theIndex.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "lat", "_mmx.unknown", "long", "_mmx.unknown", "val", 101L)));
    theIndex.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "dim.geo", "_mmx.unknown", "val", 501L)));
    theIndex.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "lat2", 0.0f, "long2", 0.0f, "val", 13L)));
    // Add a bunch of random points
    Random rand = new Random();
    for (int i = 8; i < NUM_POINTS; i++) {
        theIndex.add(new MapBasedInputRow(new DateTime("2013-01-01").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-01").toString(), "dim", "boo", "lat", (float) (rand.nextFloat() * 10 + 10.0), "long", (float) (rand.nextFloat() * 10 + 10.0), "val", i)));
    }
    return theIndex;
}
Also used : Random(java.util.Random) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) SpatialDimensionSchema(io.druid.data.input.impl.SpatialDimensionSchema) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime)

Example 19 with DimensionsSpec

use of io.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class IndexGeneratorCombinerTest method setUp.

@Before
public void setUp() throws Exception {
    HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host", "keywords")), null, null)), null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited"), new HyperUniquesAggregatorFactory("unique_hosts", "host") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(Interval.parse("2010/2011"))), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.<String, Object>of("paths", "/tmp/dummy", "type", "static"), null, "/tmp/dummy"), HadoopTuningConfig.makeDefaultTuningConfig().withWorkingPath("/tmp/work").withVersion("ver")));
    Configuration hadoopConfig = new Configuration();
    hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
    Reducer.Context context = EasyMock.createMock(Reducer.Context.class);
    EasyMock.expect(context.getConfiguration()).andReturn(hadoopConfig);
    EasyMock.replay(context);
    aggregators = config.getSchema().getDataSchema().getAggregators();
    combiner = new IndexGeneratorJob.IndexGeneratorCombiner();
    combiner.setup(context);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DataSchema(io.druid.segment.indexing.DataSchema) TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) Reducer(org.apache.hadoop.mapreduce.Reducer) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Before(org.junit.Before)

Example 20 with DimensionsSpec

use of io.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class IndexGeneratorJobTest method constructFeed.

@Parameterized.Parameters(name = "useCombiner={0}, partitionType={1}, interval={2}, shardInfoForEachSegment={3}, " + "data={4}, inputFormatName={5}, inputRowParser={6}, maxRowsInMemory={7}, " + "aggs={8}, datasourceName={9}, forceExtendableShardSpecs={10}, buildV9Directly={11}")
public static Collection<Object[]> constructFeed() {
    final List<Object[]> baseConstructors = Arrays.asList(new Object[][] { { false, "single", "2014-10-22T00:00:00Z/P2D", new String[][][] { { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } }, { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102200,b.exmaple.com,50", "2014102200,c.example.com,200", "2014102200,d.example.com,250", "2014102200,e.example.com,123", "2014102200,f.example.com,567", "2014102200,g.example.com,11", "2014102200,h.example.com,251", "2014102200,i.example.com,963", "2014102200,j.example.com,333", "2014102300,a.example.com,100", "2014102300,b.exmaple.com,50", "2014102300,c.example.com,200", "2014102300,d.example.com,250", "2014102300,e.example.com,123", "2014102300,f.example.com,567", "2014102300,g.example.com,11", "2014102300,h.example.com,251", "2014102300,i.example.com,963", "2014102300,j.example.com,333"), null, new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num")), null), null, aggs1, "website" }, { false, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { { 0, 4 }, { 1, 4 }, { 2, 4 }, { 3, 4 } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102201,b.exmaple.com,50", "2014102202,c.example.com,200", "2014102203,d.example.com,250", "2014102204,e.example.com,123", "2014102205,f.example.com,567", "2014102206,g.example.com,11", "2014102207,h.example.com,251", "2014102208,i.example.com,963", "2014102209,j.example.com,333", "2014102210,k.example.com,253", "2014102211,l.example.com,321", "2014102212,m.example.com,3125", "2014102213,n.example.com,234", "2014102214,o.example.com,325", "2014102215,p.example.com,3533", "2014102216,q.example.com,500", "2014102216,q.example.com,87"), null, new HadoopyStringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num"))), null, aggs1, "website" }, { true, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { { 0, 4 }, { 1, 4 }, { 2, 4 }, { 3, 4 } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102201,b.exmaple.com,50", "2014102202,c.example.com,200", "2014102203,d.example.com,250", "2014102204,e.example.com,123", "2014102205,f.example.com,567", "2014102206,g.example.com,11", "2014102207,h.example.com,251", "2014102208,i.example.com,963", "2014102209,j.example.com,333", "2014102210,k.example.com,253", "2014102211,l.example.com,321", "2014102212,m.example.com,3125", "2014102213,n.example.com,234", "2014102214,o.example.com,325", "2014102215,p.example.com,3533", "2014102216,q.example.com,500", "2014102216,q.example.com,87"), null, new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num")), null), null, aggs1, "website" }, { false, "single", "2014-10-22T00:00:00Z/P2D", new String[][][] { { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } }, { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102200,b.exmaple.com,50", "2014102200,c.example.com,200", "2014102200,d.example.com,250", "2014102200,e.example.com,123", "2014102200,f.example.com,567", "2014102200,g.example.com,11", "2014102200,h.example.com,251", "2014102200,i.example.com,963", "2014102200,j.example.com,333", "2014102300,a.example.com,100", "2014102300,b.exmaple.com,50", "2014102300,c.example.com,200", "2014102300,d.example.com,250", "2014102300,e.example.com,123", "2014102300,f.example.com,567", "2014102300,g.example.com,11", "2014102300,h.example.com,251", "2014102300,i.example.com,963", "2014102300,j.example.com,333"), SequenceFileInputFormat.class.getName(), new HadoopyStringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num"))), null, aggs1, "website" }, { // Tests that new indexes inherit the dimension order from previous index
    false, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { // use a single partition, dimension order inheritance is not supported across partitions
    { 0, 1 } } }, ImmutableList.of("{\"ts\":\"2014102200\", \"X\":\"x.example.com\"}", "{\"ts\":\"2014102201\", \"Y\":\"y.example.com\"}", "{\"ts\":\"2014102202\", \"M\":\"m.example.com\"}", "{\"ts\":\"2014102203\", \"Q\":\"q.example.com\"}", "{\"ts\":\"2014102204\", \"B\":\"b.example.com\"}", "{\"ts\":\"2014102205\", \"F\":\"f.example.com\"}"), null, new StringInputRowParser(new JSONParseSpec(new TimestampSpec("ts", "yyyyMMddHH", null), new DimensionsSpec(null, null, null), null, null), null), // force 1 row max per index for easier testing
    1, aggs2, "inherit_dims" }, { // Tests that pre-specified dim order is maintained across indexes.
    false, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { { 0, 1 } } }, ImmutableList.of("{\"ts\":\"2014102200\", \"X\":\"x.example.com\"}", "{\"ts\":\"2014102201\", \"Y\":\"y.example.com\"}", "{\"ts\":\"2014102202\", \"M\":\"m.example.com\"}", "{\"ts\":\"2014102203\", \"Q\":\"q.example.com\"}", "{\"ts\":\"2014102204\", \"B\":\"b.example.com\"}", "{\"ts\":\"2014102205\", \"F\":\"f.example.com\"}"), null, new StringInputRowParser(new JSONParseSpec(new TimestampSpec("ts", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("B", "F", "M", "Q", "X", "Y")), null, null), null, null), null), // force 1 row max per index for easier testing
    1, aggs2, "inherit_dims2" } });
    // Run each baseConstructor with/without buildV9Directly and forceExtendableShardSpecs.
    final List<Object[]> constructors = Lists.newArrayList();
    for (Object[] baseConstructor : baseConstructors) {
        for (int buildV9Directly = 0; buildV9Directly < 2; buildV9Directly++) {
            for (int forceExtendableShardSpecs = 0; forceExtendableShardSpecs < 2; forceExtendableShardSpecs++) {
                final Object[] fullConstructor = new Object[baseConstructor.length + 2];
                System.arraycopy(baseConstructor, 0, fullConstructor, 0, baseConstructor.length);
                fullConstructor[baseConstructor.length] = forceExtendableShardSpecs == 0;
                fullConstructor[baseConstructor.length + 1] = buildV9Directly == 0;
                constructors.add(fullConstructor);
            }
        }
    }
    return constructors;
}
Also used : CSVParseSpec(io.druid.data.input.impl.CSVParseSpec) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) JSONParseSpec(io.druid.data.input.impl.JSONParseSpec)

Aggregations

DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)41 TimestampSpec (io.druid.data.input.impl.TimestampSpec)29 JSONParseSpec (io.druid.data.input.impl.JSONParseSpec)16 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)16 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)16 Test (org.junit.Test)14 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)13 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)11 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)10 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)10 DateTime (org.joda.time.DateTime)9 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)8 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)8 DataSchema (io.druid.segment.indexing.DataSchema)8 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)8 File (java.io.File)8 Map (java.util.Map)8 SpatialDimensionSchema (io.druid.data.input.impl.SpatialDimensionSchema)7 CSVParseSpec (io.druid.data.input.impl.CSVParseSpec)6 StringDimensionSchema (io.druid.data.input.impl.StringDimensionSchema)6