Examples with StringInputRowParser - io.druid.data.input.impl.StringInputRowParser

Example 6 with StringInputRowParser

use of io.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class DruidJsonValidator method run.

@Override
public void run() {
    File file = new File(jsonFile);
    if (!file.exists()) {
        System.out.printf("File[%s] does not exist.%n", file);
    }
    final Injector injector = makeInjector();
    final ObjectMapper jsonMapper = injector.getInstance(ObjectMapper.class);
    registerModules(jsonMapper, Iterables.concat(Initialization.getFromExtensions(injector.getInstance(ExtensionsConfig.class), DruidModule.class), Arrays.asList(new FirehoseModule(), new IndexingHadoopModule(), new IndexingServiceFirehoseModule(), new LocalDataStorageDruidModule(), new ParsersModule())));
    final ClassLoader loader;
    if (Thread.currentThread().getContextClassLoader() != null) {
        loader = Thread.currentThread().getContextClassLoader();
    } else {
        loader = DruidJsonValidator.class.getClassLoader();
    }
    if (toLogger) {
        logWriter = new NullWriter() {

            private final Logger logger = new Logger(DruidJsonValidator.class);

            @Override
            public void write(char[] cbuf, int off, int len) {
                logger.info(new String(cbuf, off, len));
            }
        };
    }
    try {
        if (type.equalsIgnoreCase("query")) {
            jsonMapper.readValue(file, Query.class);
        } else if (type.equalsIgnoreCase("hadoopConfig")) {
            jsonMapper.readValue(file, HadoopDruidIndexerConfig.class);
        } else if (type.equalsIgnoreCase("task")) {
            jsonMapper.readValue(file, Task.class);
        } else if (type.equalsIgnoreCase("parse")) {
            final StringInputRowParser parser;
            if (file.isFile()) {
                logWriter.write("loading parse spec from file '" + file + "'");
                parser = jsonMapper.readValue(file, StringInputRowParser.class);
            } else if (loader.getResource(jsonFile) != null) {
                logWriter.write("loading parse spec from resource '" + jsonFile + "'");
                parser = jsonMapper.readValue(loader.getResource(jsonFile), StringInputRowParser.class);
            } else {
                logWriter.write("cannot find proper spec from 'file'.. regarding it as a json spec");
                parser = jsonMapper.readValue(jsonFile, StringInputRowParser.class);
            }
            if (resource != null) {
                final CharSource source;
                if (new File(resource).isFile()) {
                    logWriter.write("loading data from file '" + resource + "'");
                    source = Resources.asByteSource(new File(resource).toURL()).asCharSource(Charset.forName(parser.getEncoding()));
                } else if (loader.getResource(resource) != null) {
                    logWriter.write("loading data from resource '" + resource + "'");
                    source = Resources.asByteSource(loader.getResource(resource)).asCharSource(Charset.forName(parser.getEncoding()));
                } else {
                    logWriter.write("cannot find proper data from 'resource'.. regarding it as data string");
                    source = CharSource.wrap(resource);
                }
                readData(parser, source);
            }
        } else {
            throw new UOE("Unknown type[%s]", type);
        }
    } catch (Exception e) {
        System.out.println("INVALID JSON!");
        throw Throwables.propagate(e);
    }
}

Also used : CharSource(com.google.common.io.CharSource) IndexingServiceFirehoseModule(io.druid.guice.IndexingServiceFirehoseModule) IndexingHadoopModule(io.druid.indexer.IndexingHadoopModule) LocalDataStorageDruidModule(io.druid.guice.LocalDataStorageDruidModule) UOE(io.druid.java.util.common.UOE) Logger(io.druid.java.util.common.logger.Logger) HadoopDruidIndexerConfig(io.druid.indexer.HadoopDruidIndexerConfig) NullWriter(org.apache.commons.io.output.NullWriter) IOException(java.io.IOException) FirehoseModule(io.druid.guice.FirehoseModule) IndexingServiceFirehoseModule(io.druid.guice.IndexingServiceFirehoseModule) Injector(com.google.inject.Injector) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) ParsersModule(io.druid.guice.ParsersModule) ExtensionsConfig(io.druid.guice.ExtensionsConfig) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 7 with StringInputRowParser

use of io.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class KafkaSupervisorTest method getDataSchema.

private DataSchema getDataSchema(String dataSource) {
    List<DimensionSchema> dimensions = new ArrayList<>();
    dimensions.add(StringDimensionSchema.create("dim1"));
    dimensions.add(StringDimensionSchema.create("dim2"));
    return new DataSchema(dataSource, objectMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions, null, null), new JSONPathSpec(true, ImmutableList.<JSONPathFieldSpec>of()), ImmutableMap.<String, Boolean>of()), Charsets.UTF_8.name()), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.<Interval>of()), objectMapper);
}

Also used : ArrayList(java.util.ArrayList) EasyMock.anyString(org.easymock.EasyMock.anyString) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) JSONPathSpec(io.druid.data.input.impl.JSONPathSpec) JSONParseSpec(io.druid.data.input.impl.JSONParseSpec) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Interval(org.joda.time.Interval)

Example 8 with StringInputRowParser

use of io.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class MapVirtualColumnTest method constructorFeeder.

@Parameterized.Parameters
public static Iterable<Object[]> constructorFeeder() throws IOException {
    final Supplier<SelectQueryConfig> selectConfigSupplier = Suppliers.ofInstance(new SelectQueryConfig(true));
    SelectQueryRunnerFactory factory = new SelectQueryRunnerFactory(new SelectQueryQueryToolChest(new DefaultObjectMapper(), QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator(), selectConfigSupplier), new SelectQueryEngine(selectConfigSupplier), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(new DateTime("2011-01-12T00:00:00.000Z").getMillis()).withQueryGranularity(Granularities.NONE).build();
    final IncrementalIndex index = new OnheapIncrementalIndex(schema, true, 10000);
    final StringInputRowParser parser = new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dim", "keys", "values")), null, null), "\t", ",", Arrays.asList("ts", "dim", "keys", "values")), "utf8");
    CharSource input = CharSource.wrap("2011-01-12T00:00:00.000Z\ta\tkey1,key2,key3\tvalue1,value2,value3\n" + "2011-01-12T00:00:00.000Z\tb\tkey4,key5,key6\tvalue4\n" + "2011-01-12T00:00:00.000Z\tc\tkey1,key5\tvalue1,value5,value9\n");
    IncrementalIndex index1 = TestIndex.loadIncrementalIndex(index, input, parser);
    QueryableIndex index2 = TestIndex.persistRealtimeAndLoadMMapped(index1);
    return transformToConstructionFeeder(Arrays.asList(makeQueryRunner(factory, "index1", new IncrementalIndexSegment(index1, "index1"), "incremental"), makeQueryRunner(factory, "index2", new QueryableIndexSegment("index2", index2), "queryable")));
}

Also used : CharSource(com.google.common.io.CharSource) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) DelimitedParseSpec(io.druid.data.input.impl.DelimitedParseSpec) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) SelectQueryRunnerFactory(io.druid.query.select.SelectQueryRunnerFactory) SelectQueryConfig(io.druid.query.select.SelectQueryConfig) DateTime(org.joda.time.DateTime) SelectQueryQueryToolChest(io.druid.query.select.SelectQueryQueryToolChest) SelectQueryEngine(io.druid.query.select.SelectQueryEngine) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema)

Example 9 with StringInputRowParser

use of io.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class IndexGeneratorCombinerTest method setUp.

@Before
public void setUp() throws Exception {
    HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host", "keywords")), null, null)), null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited"), new HyperUniquesAggregatorFactory("unique_hosts", "host") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(Interval.parse("2010/2011"))), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.<String, Object>of("paths", "/tmp/dummy", "type", "static"), null, "/tmp/dummy"), HadoopTuningConfig.makeDefaultTuningConfig().withWorkingPath("/tmp/work").withVersion("ver")));
    Configuration hadoopConfig = new Configuration();
    hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
    Reducer.Context context = EasyMock.createMock(Reducer.Context.class);
    EasyMock.expect(context.getConfiguration()).andReturn(hadoopConfig);
    EasyMock.replay(context);
    aggregators = config.getSchema().getDataSchema().getAggregators();
    combiner = new IndexGeneratorJob.IndexGeneratorCombiner();
    combiner.setup(context);
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DataSchema(io.druid.segment.indexing.DataSchema) TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) Reducer(org.apache.hadoop.mapreduce.Reducer) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Before(org.junit.Before)

Example 10 with StringInputRowParser

use of io.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class IndexGeneratorJobTest method constructFeed.

@Parameterized.Parameters(name = "useCombiner={0}, partitionType={1}, interval={2}, shardInfoForEachSegment={3}, " + "data={4}, inputFormatName={5}, inputRowParser={6}, maxRowsInMemory={7}, " + "aggs={8}, datasourceName={9}, forceExtendableShardSpecs={10}, buildV9Directly={11}")
public static Collection<Object[]> constructFeed() {
    final List<Object[]> baseConstructors = Arrays.asList(new Object[][] { { false, "single", "2014-10-22T00:00:00Z/P2D", new String[][][] { { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } }, { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102200,b.exmaple.com,50", "2014102200,c.example.com,200", "2014102200,d.example.com,250", "2014102200,e.example.com,123", "2014102200,f.example.com,567", "2014102200,g.example.com,11", "2014102200,h.example.com,251", "2014102200,i.example.com,963", "2014102200,j.example.com,333", "2014102300,a.example.com,100", "2014102300,b.exmaple.com,50", "2014102300,c.example.com,200", "2014102300,d.example.com,250", "2014102300,e.example.com,123", "2014102300,f.example.com,567", "2014102300,g.example.com,11", "2014102300,h.example.com,251", "2014102300,i.example.com,963", "2014102300,j.example.com,333"), null, new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num")), null), null, aggs1, "website" }, { false, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { { 0, 4 }, { 1, 4 }, { 2, 4 }, { 3, 4 } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102201,b.exmaple.com,50", "2014102202,c.example.com,200", "2014102203,d.example.com,250", "2014102204,e.example.com,123", "2014102205,f.example.com,567", "2014102206,g.example.com,11", "2014102207,h.example.com,251", "2014102208,i.example.com,963", "2014102209,j.example.com,333", "2014102210,k.example.com,253", "2014102211,l.example.com,321", "2014102212,m.example.com,3125", "2014102213,n.example.com,234", "2014102214,o.example.com,325", "2014102215,p.example.com,3533", "2014102216,q.example.com,500", "2014102216,q.example.com,87"), null, new HadoopyStringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num"))), null, aggs1, "website" }, { true, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { { 0, 4 }, { 1, 4 }, { 2, 4 }, { 3, 4 } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102201,b.exmaple.com,50", "2014102202,c.example.com,200", "2014102203,d.example.com,250", "2014102204,e.example.com,123", "2014102205,f.example.com,567", "2014102206,g.example.com,11", "2014102207,h.example.com,251", "2014102208,i.example.com,963", "2014102209,j.example.com,333", "2014102210,k.example.com,253", "2014102211,l.example.com,321", "2014102212,m.example.com,3125", "2014102213,n.example.com,234", "2014102214,o.example.com,325", "2014102215,p.example.com,3533", "2014102216,q.example.com,500", "2014102216,q.example.com,87"), null, new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num")), null), null, aggs1, "website" }, { false, "single", "2014-10-22T00:00:00Z/P2D", new String[][][] { { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } }, { { null, "c.example.com" }, { "c.example.com", "e.example.com" }, { "e.example.com", "g.example.com" }, { "g.example.com", "i.example.com" }, { "i.example.com", null } } }, ImmutableList.of("2014102200,a.example.com,100", "2014102200,b.exmaple.com,50", "2014102200,c.example.com,200", "2014102200,d.example.com,250", "2014102200,e.example.com,123", "2014102200,f.example.com,567", "2014102200,g.example.com,11", "2014102200,h.example.com,251", "2014102200,i.example.com,963", "2014102200,j.example.com,333", "2014102300,a.example.com,100", "2014102300,b.exmaple.com,50", "2014102300,c.example.com,200", "2014102300,d.example.com,250", "2014102300,e.example.com,123", "2014102300,f.example.com,567", "2014102300,g.example.com,11", "2014102300,h.example.com,251", "2014102300,i.example.com,963", "2014102300,j.example.com,333"), SequenceFileInputFormat.class.getName(), new HadoopyStringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num"))), null, aggs1, "website" }, { // Tests that new indexes inherit the dimension order from previous index
    false, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { // use a single partition, dimension order inheritance is not supported across partitions
    { 0, 1 } } }, ImmutableList.of("{\"ts\":\"2014102200\", \"X\":\"x.example.com\"}", "{\"ts\":\"2014102201\", \"Y\":\"y.example.com\"}", "{\"ts\":\"2014102202\", \"M\":\"m.example.com\"}", "{\"ts\":\"2014102203\", \"Q\":\"q.example.com\"}", "{\"ts\":\"2014102204\", \"B\":\"b.example.com\"}", "{\"ts\":\"2014102205\", \"F\":\"f.example.com\"}"), null, new StringInputRowParser(new JSONParseSpec(new TimestampSpec("ts", "yyyyMMddHH", null), new DimensionsSpec(null, null, null), null, null), null), // force 1 row max per index for easier testing
    1, aggs2, "inherit_dims" }, { // Tests that pre-specified dim order is maintained across indexes.
    false, "hashed", "2014-10-22T00:00:00Z/P1D", new Integer[][][] { { { 0, 1 } } }, ImmutableList.of("{\"ts\":\"2014102200\", \"X\":\"x.example.com\"}", "{\"ts\":\"2014102201\", \"Y\":\"y.example.com\"}", "{\"ts\":\"2014102202\", \"M\":\"m.example.com\"}", "{\"ts\":\"2014102203\", \"Q\":\"q.example.com\"}", "{\"ts\":\"2014102204\", \"B\":\"b.example.com\"}", "{\"ts\":\"2014102205\", \"F\":\"f.example.com\"}"), null, new StringInputRowParser(new JSONParseSpec(new TimestampSpec("ts", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("B", "F", "M", "Q", "X", "Y")), null, null), null, null), null), // force 1 row max per index for easier testing
    1, aggs2, "inherit_dims2" } });
    // Run each baseConstructor with/without buildV9Directly and forceExtendableShardSpecs.
    final List<Object[]> constructors = Lists.newArrayList();
    for (Object[] baseConstructor : baseConstructors) {
        for (int buildV9Directly = 0; buildV9Directly < 2; buildV9Directly++) {
            for (int forceExtendableShardSpecs = 0; forceExtendableShardSpecs < 2; forceExtendableShardSpecs++) {
                final Object[] fullConstructor = new Object[baseConstructor.length + 2];
                System.arraycopy(baseConstructor, 0, fullConstructor, 0, baseConstructor.length);
                fullConstructor[baseConstructor.length] = forceExtendableShardSpecs == 0;
                fullConstructor[baseConstructor.length + 1] = buildV9Directly == 0;
                constructors.add(fullConstructor);
            }
        }
    }
    return constructors;
}

Also used : CSVParseSpec(io.druid.data.input.impl.CSVParseSpec) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) JSONParseSpec(io.druid.data.input.impl.JSONParseSpec)

Aggregations

StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)19 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)16 TimestampSpec (io.druid.data.input.impl.TimestampSpec)16 JSONParseSpec (io.druid.data.input.impl.JSONParseSpec)8 Map (java.util.Map)8 DataSchema (io.druid.segment.indexing.DataSchema)7 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)7 CSVParseSpec (io.druid.data.input.impl.CSVParseSpec)6 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)5 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)5 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)5 Test (org.junit.Test)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)4 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)4 ArbitraryGranularitySpec (io.druid.segment.indexing.granularity.ArbitraryGranularitySpec)4 Before (org.junit.Before)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)3 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)3