Search in sources :

Example 21 with TimestampSpec

use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class HadoopConverterJobTest method setUp.

@Before
public void setUp() throws Exception {
    final MetadataStorageUpdaterJobSpec metadataStorageUpdaterJobSpec = new MetadataStorageUpdaterJobSpec() {

        @Override
        public String getSegmentTable() {
            return derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable();
        }

        @Override
        public MetadataStorageConnectorConfig get() {
            return derbyConnectorRule.getMetadataConnectorConfig();
        }
    };
    final File scratchFileDir = temporaryFolder.newFolder();
    storageLocProperty = System.getProperty(STORAGE_PROPERTY_KEY);
    tmpSegmentDir = temporaryFolder.newFolder();
    System.setProperty(STORAGE_PROPERTY_KEY, tmpSegmentDir.getAbsolutePath());
    final URL url = Preconditions.checkNotNull(Query.class.getClassLoader().getResource("druid.sample.tsv"));
    final File tmpInputFile = temporaryFolder.newFile();
    FileUtils.retryCopy(new ByteSource() {

        @Override
        public InputStream openStream() throws IOException {
            return url.openStream();
        }
    }, tmpInputFile, FileUtils.IS_EXCEPTION, 3);
    final HadoopDruidIndexerConfig hadoopDruidIndexerConfig = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema(DATASOURCE, HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList(TestIndex.DIMENSIONS)), null, null), "\t", "", Arrays.asList(TestIndex.COLUMNS)), null), Map.class), new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.METRICS[0], TestIndex.METRICS[0]), new HyperUniquesAggregatorFactory("quality_uniques", "quality") }, new UniformGranularitySpec(Granularities.MONTH, Granularities.DAY, ImmutableList.<Interval>of(interval)), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.<String, Object>of("type", "static", "paths", tmpInputFile.getAbsolutePath()), metadataStorageUpdaterJobSpec, tmpSegmentDir.getAbsolutePath()), new HadoopTuningConfig(scratchFileDir.getAbsolutePath(), null, null, null, null, null, false, false, false, false, null, false, false, null, null, null, false, false)));
    metadataStorageTablesConfigSupplier = derbyConnectorRule.metadataTablesConfigSupplier();
    connector = derbyConnectorRule.getConnector();
    try {
        connector.getDBI().withHandle(new HandleCallback<Void>() {

            @Override
            public Void withHandle(Handle handle) throws Exception {
                handle.execute("DROP TABLE druid_segments");
                return null;
            }
        });
    } catch (CallbackFailedException e) {
    // Who cares
    }
    List<Jobby> jobs = ImmutableList.of(new Jobby() {

        @Override
        public boolean run() {
            connector.createSegmentTable(metadataStorageUpdaterJobSpec.getSegmentTable());
            return true;
        }
    }, new HadoopDruidDetermineConfigurationJob(hadoopDruidIndexerConfig), new HadoopDruidIndexerJob(hadoopDruidIndexerConfig, new SQLMetadataStorageUpdaterJobHandler(connector)));
    JobHelper.runJobs(jobs, hadoopDruidIndexerConfig);
}
Also used : HadoopIngestionSpec(io.druid.indexer.HadoopIngestionSpec) HadoopTuningConfig(io.druid.indexer.HadoopTuningConfig) URL(java.net.URL) HadoopIOConfig(io.druid.indexer.HadoopIOConfig) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) TimestampSpec(io.druid.data.input.impl.TimestampSpec) SQLMetadataStorageUpdaterJobHandler(io.druid.indexer.SQLMetadataStorageUpdaterJobHandler) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) InputStream(java.io.InputStream) DelimitedParseSpec(io.druid.data.input.impl.DelimitedParseSpec) IOException(java.io.IOException) HadoopDruidIndexerConfig(io.druid.indexer.HadoopDruidIndexerConfig) IOException(java.io.IOException) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) Handle(org.skife.jdbi.v2.Handle) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) DataSchema(io.druid.segment.indexing.DataSchema) Jobby(io.druid.indexer.Jobby) HadoopDruidIndexerJob(io.druid.indexer.HadoopDruidIndexerJob) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ByteSource(com.google.common.io.ByteSource) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HadoopDruidDetermineConfigurationJob(io.druid.indexer.HadoopDruidDetermineConfigurationJob) Interval(org.joda.time.Interval) Before(org.junit.Before)

Example 22 with TimestampSpec

use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class JobHelperTest method setup.

@Before
public void setup() throws Exception {
    tmpDir = temporaryFolder.newFile();
    dataFile = temporaryFolder.newFile();
    config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num")), null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("visited_num", "visited_num") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(this.interval)), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.<String, Object>of("paths", dataFile.getCanonicalPath(), "type", "static"), null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, false, false, false, false, //Map of job properties
    ImmutableMap.of("fs.s3.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem", "fs.s3.awsAccessKeyId", "THISISMYACCESSKEY"), false, false, null, null, null, false, false)));
}
Also used : LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) CSVParseSpec(io.druid.data.input.impl.CSVParseSpec) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) Before(org.junit.Before)

Example 23 with TimestampSpec

use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class ParquetHadoopInputRowParser method parse.

/**
   * imitate avro extension {@link AvroStreamInputRowParser#parseGenericRecord(GenericRecord, ParseSpec, List, boolean, boolean)}
   */
@Override
public InputRow parse(GenericRecord record) {
    GenericRecordAsMap genericRecordAsMap = new GenericRecordAsMap(record, false, binaryAsString);
    TimestampSpec timestampSpec = parseSpec.getTimestampSpec();
    DateTime dateTime = timestampSpec.extractTimestamp(genericRecordAsMap);
    return new MapBasedInputRow(dateTime, dimensions, genericRecordAsMap);
}
Also used : GenericRecordAsMap(io.druid.data.input.avro.GenericRecordAsMap) TimestampSpec(io.druid.data.input.impl.TimestampSpec) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime)

Example 24 with TimestampSpec

use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class OrcHadoopInputRowParserTest method testSerde.

@Test
public void testSerde() throws IOException {
    String parserString = "{\n" + "        \"type\": \"orc\",\n" + "        \"parseSpec\": {\n" + "          \"format\": \"timeAndDims\",\n" + "          \"timestampSpec\": {\n" + "            \"column\": \"timestamp\",\n" + "            \"format\": \"auto\"\n" + "          },\n" + "          \"dimensionsSpec\": {\n" + "            \"dimensions\": [\n" + "              \"col1\",\n" + "              \"col2\"\n" + "            ],\n" + "            \"dimensionExclusions\": [],\n" + "            \"spatialDimensions\": []\n" + "          }\n" + "        },\n" + "        \"typeString\": \"struct<timestamp:string,col1:string,col2:array<string>,val1:float>\"\n" + "      }";
    InputRowParser parser = mapper.readValue(parserString, InputRowParser.class);
    InputRowParser expected = new OrcHadoopInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("col1"), new StringDimensionSchema("col2")), null, null)), "struct<timestamp:string,col1:string,col2:array<string>,val1:float>");
    Assert.assertEquals(expected, parser);
}
Also used : TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) InputRowParser(io.druid.data.input.impl.InputRowParser) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) Test(org.junit.Test)

Example 25 with TimestampSpec

use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class GroupByQueryRunnerFactoryTest method createSegment.

private Segment createSegment() throws Exception {
    IncrementalIndex incrementalIndex = new OnheapIncrementalIndex(0, Granularities.NONE, new AggregatorFactory[] { new CountAggregatorFactory("count") }, true, true, true, 5000);
    StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags")), null, null), "\t", ImmutableList.of("timestamp", "product", "tags")), "UTF-8");
    String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1", "2011-01-13T00:00:00.000Z,product_2,t2", "2011-01-14T00:00:00.000Z,product_3,t2" };
    for (String row : rows) {
        incrementalIndex.add(parser.parse(row));
    }
    closerRule.closeLater(incrementalIndex);
    return new IncrementalIndexSegment(incrementalIndex, "test");
}
Also used : CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) CSVParseSpec(io.druid.data.input.impl.CSVParseSpec) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec)

Aggregations

TimestampSpec (io.druid.data.input.impl.TimestampSpec)40 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)31 JSONParseSpec (io.druid.data.input.impl.JSONParseSpec)16 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)16 Test (org.junit.Test)15 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)12 Map (java.util.Map)11 DataSchema (io.druid.segment.indexing.DataSchema)10 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)10 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)9 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)8 DateTime (org.joda.time.DateTime)8 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)7 ArrayList (java.util.ArrayList)7 CSVParseSpec (io.druid.data.input.impl.CSVParseSpec)6 StringDimensionSchema (io.druid.data.input.impl.StringDimensionSchema)6 TimeAndDimsParseSpec (io.druid.data.input.impl.TimeAndDimsParseSpec)6 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)6 InputRowParser (io.druid.data.input.impl.InputRowParser)5 MapInputRowParser (io.druid.data.input.impl.MapInputRowParser)5