Search in sources :

Example 6 with JSONParseSpec

use of io.druid.data.input.impl.JSONParseSpec in project druid by druid-io.

the class KafkaSupervisorTest method getDataSchema.

private DataSchema getDataSchema(String dataSource) {
    List<DimensionSchema> dimensions = new ArrayList<>();
    dimensions.add(StringDimensionSchema.create("dim1"));
    dimensions.add(StringDimensionSchema.create("dim2"));
    return new DataSchema(dataSource, objectMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions, null, null), new JSONPathSpec(true, ImmutableList.<JSONPathFieldSpec>of()), ImmutableMap.<String, Boolean>of()), Charsets.UTF_8.name()), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.<Interval>of()), objectMapper);
}
Also used : ArrayList(java.util.ArrayList) EasyMock.anyString(org.easymock.EasyMock.anyString) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) JSONPathSpec(io.druid.data.input.impl.JSONPathSpec) JSONParseSpec(io.druid.data.input.impl.JSONParseSpec) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Interval(org.joda.time.Interval)

Example 7 with JSONParseSpec

use of io.druid.data.input.impl.JSONParseSpec in project druid by druid-io.

the class IndexGeneratorJobTest method constructFeed.

@Parameterized.Parameters(name = "useCombiner={0}, partitionType={1}, interval={2}, shardInfoForEachSegment={3}, " + "data={4}, inputFormatName={5}, inputRowParser={6}, maxRowsInMemory={7}, " + "aggs={8}, datasourceName={9}, forceExtendableShardSpecs={10}, buildV9Directly={11}")
public static Collection<Object[]> constructFeed() {
    final List<Object[]> baseConstructors = Arrays.asList(new Object[][] { { false, "single", "2014-10-22T00:00:00Z/P2D", new String[][][] { { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } }, { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102200,b.exmaple.com,50", "2014102200,c.example.com,200", "2014102200,d.example.com,250", "2014102200,e.example.com,123", "2014102200,f.example.com,567", "2014102200,g.example.com,11", "2014102200,h.example.com,251", "2014102200,i.example.com,963", "2014102200,j.example.com,333", "2014102300,a.example.com,100", "2014102300,b.exmaple.com,50", "2014102300,c.example.com,200", "2014102300,d.example.com,250", "2014102300,e.example.com,123", "2014102300,f.example.com,567", "2014102300,g.example.com,11", "2014102300,h.example.com,251", "2014102300,i.example.com,963", "2014102300,j.example.com,333"), null, new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num")), null), null, aggs1, "website" }, { false, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { { 0, 4 }, { 1, 4 }, { 2, 4 }, { 3, 4 } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102201,b.exmaple.com,50", "2014102202,c.example.com,200", "2014102203,d.example.com,250", "2014102204,e.example.com,123", "2014102205,f.example.com,567", "2014102206,g.example.com,11", "2014102207,h.example.com,251", "2014102208,i.example.com,963", "2014102209,j.example.com,333", "2014102210,k.example.com,253", "2014102211,l.example.com,321", "2014102212,m.example.com,3125", "2014102213,n.example.com,234", "2014102214,o.example.com,325", "2014102215,p.example.com,3533", "2014102216,q.example.com,500", "2014102216,q.example.com,87"), null, new HadoopyStringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num"))), null, aggs1, "website" }, { true, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { { 0, 4 }, { 1, 4 }, { 2, 4 }, { 3, 4 } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102201,b.exmaple.com,50", "2014102202,c.example.com,200", "2014102203,d.example.com,250", "2014102204,e.example.com,123", "2014102205,f.example.com,567", "2014102206,g.example.com,11", "2014102207,h.example.com,251", "2014102208,i.example.com,963", "2014102209,j.example.com,333", "2014102210,k.example.com,253", "2014102211,l.example.com,321", "2014102212,m.example.com,3125", "2014102213,n.example.com,234", "2014102214,o.example.com,325", "2014102215,p.example.com,3533", "2014102216,q.example.com,500", "2014102216,q.example.com,87"), null, new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num")), null), null, aggs1, "website" }, { false, "single", "2014-10-22T00:00:00Z/P2D", new String[][][] { { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } }, { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102200,b.exmaple.com,50", "2014102200,c.example.com,200", "2014102200,d.example.com,250", "2014102200,e.example.com,123", "2014102200,f.example.com,567", "2014102200,g.example.com,11", "2014102200,h.example.com,251", "2014102200,i.example.com,963", "2014102200,j.example.com,333", "2014102300,a.example.com,100", "2014102300,b.exmaple.com,50", "2014102300,c.example.com,200", "2014102300,d.example.com,250", "2014102300,e.example.com,123", "2014102300,f.example.com,567", "2014102300,g.example.com,11", "2014102300,h.example.com,251", "2014102300,i.example.com,963", "2014102300,j.example.com,333"), SequenceFileInputFormat.class.getName(), new HadoopyStringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num"))), null, aggs1, "website" }, { // Tests that new indexes inherit the dimension order from previous index
    false, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { // use a single partition, dimension order inheritance is not supported across partitions
    { 0, 1 } } }, ImmutableList.of("{\"ts\":\"2014102200\", \"X\":\"x.example.com\"}", "{\"ts\":\"2014102201\", \"Y\":\"y.example.com\"}", "{\"ts\":\"2014102202\", \"M\":\"m.example.com\"}", "{\"ts\":\"2014102203\", \"Q\":\"q.example.com\"}", "{\"ts\":\"2014102204\", \"B\":\"b.example.com\"}", "{\"ts\":\"2014102205\", \"F\":\"f.example.com\"}"), null, new StringInputRowParser(new JSONParseSpec(new TimestampSpec("ts", "yyyyMMddHH", null), new DimensionsSpec(null, null, null), null, null), null), // force 1 row max per index for easier testing
    1, aggs2, "inherit_dims" }, { // Tests that pre-specified dim order is maintained across indexes.
    false, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { { 0, 1 } } }, ImmutableList.of("{\"ts\":\"2014102200\", \"X\":\"x.example.com\"}", "{\"ts\":\"2014102201\", \"Y\":\"y.example.com\"}", "{\"ts\":\"2014102202\", \"M\":\"m.example.com\"}", "{\"ts\":\"2014102203\", \"Q\":\"q.example.com\"}", "{\"ts\":\"2014102204\", \"B\":\"b.example.com\"}", "{\"ts\":\"2014102205\", \"F\":\"f.example.com\"}"), null, new StringInputRowParser(new JSONParseSpec(new TimestampSpec("ts", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("B", "F", "M", "Q", "X", "Y")), null, null), null, null), null), // force 1 row max per index for easier testing
    1, aggs2, "inherit_dims2" } });
    // Run each baseConstructor with/without buildV9Directly and forceExtendableShardSpecs.
    final List<Object[]> constructors = Lists.newArrayList();
    for (Object[] baseConstructor : baseConstructors) {
        for (int buildV9Directly = 0; buildV9Directly < 2; buildV9Directly++) {
            for (int forceExtendableShardSpecs = 0; forceExtendableShardSpecs < 2; forceExtendableShardSpecs++) {
                final Object[] fullConstructor = new Object[baseConstructor.length + 2];
                System.arraycopy(baseConstructor, 0, fullConstructor, 0, baseConstructor.length);
                fullConstructor[baseConstructor.length] = forceExtendableShardSpecs == 0;
                fullConstructor[baseConstructor.length + 1] = buildV9Directly == 0;
                constructors.add(fullConstructor);
            }
        }
    }
    return constructors;
}
Also used : CSVParseSpec(io.druid.data.input.impl.CSVParseSpec) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) JSONParseSpec(io.druid.data.input.impl.JSONParseSpec)

Example 8 with JSONParseSpec

use of io.druid.data.input.impl.JSONParseSpec in project druid by druid-io.

the class DefaultOfflineAppenderatorFactoryTest method testBuild.

@Test
public void testBuild() throws IOException, SegmentNotWritableException {
    Injector injector = Initialization.makeInjectorWithModules(GuiceInjectors.makeStartupInjector(), ImmutableList.<Module>of(new Module() {

        @Override
        public void configure(Binder binder) {
            binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/tool");
            binder.bindConstant().annotatedWith(Names.named("servicePort")).to(9999);
            binder.bind(DruidProcessingConfig.class).toInstance(new DruidProcessingConfig() {

                @Override
                public String getFormatString() {
                    return "processing-%s";
                }

                @Override
                public int intermediateComputeSizeBytes() {
                    return 100 * 1024 * 1024;
                }

                @Override
                public int getNumThreads() {
                    return 1;
                }

                @Override
                public int columnCacheSizeBytes() {
                    return 25 * 1024 * 1024;
                }
            });
            binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class);
        }
    }));
    ObjectMapper objectMapper = injector.getInstance(ObjectMapper.class);
    AppenderatorFactory defaultOfflineAppenderatorFactory = objectMapper.reader(AppenderatorFactory.class).readValue("{\"type\":\"offline\"}");
    final Map<String, Object> parserMap = objectMapper.convertValue(new MapInputRowParser(new JSONParseSpec(new TimestampSpec("ts", "auto", null), new DimensionsSpec(null, null, null), null, null)), Map.class);
    DataSchema schema = new DataSchema("dataSourceName", parserMap, new AggregatorFactory[] { new CountAggregatorFactory("count"), new LongSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.MINUTE, Granularities.NONE, null), objectMapper);
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(75000, null, null, temporaryFolder.newFolder(), null, null, null, null, null, null, 0, 0, null, null);
    try (Appenderator appenderator = defaultOfflineAppenderatorFactory.build(schema, tuningConfig, new FireDepartmentMetrics())) {
        Assert.assertEquals("dataSourceName", appenderator.getDataSource());
        Assert.assertEquals(null, appenderator.startJob());
        SegmentIdentifier identifier = new SegmentIdentifier("dataSourceName", new Interval("2000/2001"), "A", new LinearShardSpec(0));
        Assert.assertEquals(0, ((AppenderatorImpl) appenderator).getRowsInMemory());
        appenderator.add(identifier, AppenderatorTest.IR("2000", "bar", 1), Suppliers.ofInstance(Committers.nil()));
        Assert.assertEquals(1, ((AppenderatorImpl) appenderator).getRowsInMemory());
        appenderator.add(identifier, AppenderatorTest.IR("2000", "baz", 1), Suppliers.ofInstance(Committers.nil()));
        Assert.assertEquals(2, ((AppenderatorImpl) appenderator).getRowsInMemory());
        appenderator.close();
        Assert.assertEquals(0, ((AppenderatorImpl) appenderator).getRowsInMemory());
    }
}
Also used : ColumnConfig(io.druid.segment.column.ColumnConfig) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) Binder(com.google.inject.Binder) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) Injector(com.google.inject.Injector) TimestampSpec(io.druid.data.input.impl.TimestampSpec) JSONParseSpec(io.druid.data.input.impl.JSONParseSpec) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) DataSchema(io.druid.segment.indexing.DataSchema) FireDepartmentMetrics(io.druid.segment.realtime.FireDepartmentMetrics) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) Module(com.google.inject.Module) DruidProcessingConfig(io.druid.query.DruidProcessingConfig) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 9 with JSONParseSpec

use of io.druid.data.input.impl.JSONParseSpec in project druid by druid-io.

the class EventReceiverFirehoseTest method setUp.

@Before
public void setUp() throws Exception {
    req = EasyMock.createMock(HttpServletRequest.class);
    eventReceiverFirehoseFactory = new EventReceiverFirehoseFactory(SERVICE_NAME, CAPACITY, null, new DefaultObjectMapper(), new DefaultObjectMapper(), register);
    firehose = (EventReceiverFirehoseFactory.EventReceiverFirehose) eventReceiverFirehoseFactory.connect(new MapInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("d1")), null, null), null, null)));
}
Also used : HttpServletRequest(javax.servlet.http.HttpServletRequest) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) JSONParseSpec(io.druid.data.input.impl.JSONParseSpec) Before(org.junit.Before)

Example 10 with JSONParseSpec

use of io.druid.data.input.impl.JSONParseSpec in project druid by druid-io.

the class EventReceiverFirehoseTest method testDuplicateRegistering.

@Test(expected = ISE.class)
public void testDuplicateRegistering() throws IOException {
    EventReceiverFirehoseFactory eventReceiverFirehoseFactory2 = new EventReceiverFirehoseFactory(SERVICE_NAME, CAPACITY, null, new DefaultObjectMapper(), new DefaultObjectMapper(), register);
    EventReceiverFirehoseFactory.EventReceiverFirehose firehose2 = (EventReceiverFirehoseFactory.EventReceiverFirehose) eventReceiverFirehoseFactory2.connect(new MapInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("d1")), null, null), null, null)));
}
Also used : MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) JSONParseSpec(io.druid.data.input.impl.JSONParseSpec) Test(org.junit.Test)

Aggregations

DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)16 JSONParseSpec (io.druid.data.input.impl.JSONParseSpec)16 TimestampSpec (io.druid.data.input.impl.TimestampSpec)16 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)8 Test (org.junit.Test)8 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)6 ArrayList (java.util.ArrayList)5 JSONPathSpec (io.druid.data.input.impl.JSONPathSpec)4 MapInputRowParser (io.druid.data.input.impl.MapInputRowParser)4 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)4 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)4 DataSchema (io.druid.segment.indexing.DataSchema)4 ArbitraryGranularitySpec (io.druid.segment.indexing.granularity.ArbitraryGranularitySpec)4 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)4 Map (java.util.Map)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 JSONPathFieldSpec (io.druid.data.input.impl.JSONPathFieldSpec)3 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)3 Interval (org.joda.time.Interval)3 Binder (com.google.inject.Binder)2