Search in sources :

Example 1 with TimestampSpec

use of io.druid.data.input.impl.TimestampSpec in project hive by apache.

the class TestDruidRecordWriter method testWrite.

// This test need this patch https://github.com/druid-io/druid/pull/3483
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
    final String dataSourceName = "testDataSource";
    final File segmentOutputDir = temporaryFolder.newFolder();
    final File workingDir = temporaryFolder.newFolder();
    Configuration config = new Configuration();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidTable.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("host")), null, null)));
    final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, Map.class);
    DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularity.DAY, QueryGranularities.NONE, ImmutableList.of(INTERVAL_FULL)), objectMapper);
    RealtimeTuningConfig tuningConfig = RealtimeTuningConfig.makeDefaultTuningConfig(temporaryFolder.newFolder());
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

        @Override
        public File getStorageDirectory() {
            return segmentOutputDir;
        }
    }, objectMapper);
    Path segmentDescriptroPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
    druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptroPath, localFileSystem);
    List<DruidWritable> druidWritables = Lists.transform(expectedRows, new Function<ImmutableMap<String, Object>, DruidWritable>() {

        @Nullable
        @Override
        public DruidWritable apply(@Nullable ImmutableMap<String, Object> input) {
            return new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularity.DAY.truncate(new DateTime((long) input.get(DruidTable.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build());
        }
    });
    for (DruidWritable druidWritable : druidWritables) {
        druidRecordWriter.write(druidWritable);
    }
    druidRecordWriter.close(false);
    List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getPublishedSegments(segmentDescriptroPath, config);
    Assert.assertEquals(1, dataSegmentList.size());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
    final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, QueryGranularities.NONE);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRows, rows);
}
Also used : IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) LocalDataSegmentPusher(io.druid.segment.loading.LocalDataSegmentPusher) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) Configuration(org.apache.hadoop.conf.Configuration) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) LocalDataSegmentPuller(io.druid.segment.loading.LocalDataSegmentPuller) TimestampSpec(io.druid.data.input.impl.TimestampSpec) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) Path(org.apache.hadoop.fs.Path) Firehose(io.druid.data.input.Firehose) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) LocalDataSegmentPusherConfig(io.druid.segment.loading.LocalDataSegmentPusherConfig) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) LocalDataSegmentPusher(io.druid.segment.loading.LocalDataSegmentPusher) ImmutableMap(com.google.common.collect.ImmutableMap) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DataSchema(io.druid.segment.indexing.DataSchema) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) QueryableIndex(io.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(io.druid.data.input.InputRow) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) InputRowParser(io.druid.data.input.impl.InputRowParser) File(java.io.File) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) Nullable(javax.annotation.Nullable) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 2 with TimestampSpec

use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class MapVirtualColumnTest method constructorFeeder.

@Parameterized.Parameters
public static Iterable<Object[]> constructorFeeder() throws IOException {
    final Supplier<SelectQueryConfig> selectConfigSupplier = Suppliers.ofInstance(new SelectQueryConfig(true));
    SelectQueryRunnerFactory factory = new SelectQueryRunnerFactory(new SelectQueryQueryToolChest(new DefaultObjectMapper(), QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator(), selectConfigSupplier), new SelectQueryEngine(selectConfigSupplier), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(new DateTime("2011-01-12T00:00:00.000Z").getMillis()).withQueryGranularity(Granularities.NONE).build();
    final IncrementalIndex index = new OnheapIncrementalIndex(schema, true, 10000);
    final StringInputRowParser parser = new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dim", "keys", "values")), null, null), "\t", ",", Arrays.asList("ts", "dim", "keys", "values")), "utf8");
    CharSource input = CharSource.wrap("2011-01-12T00:00:00.000Z\ta\tkey1,key2,key3\tvalue1,value2,value3\n" + "2011-01-12T00:00:00.000Z\tb\tkey4,key5,key6\tvalue4\n" + "2011-01-12T00:00:00.000Z\tc\tkey1,key5\tvalue1,value5,value9\n");
    IncrementalIndex index1 = TestIndex.loadIncrementalIndex(index, input, parser);
    QueryableIndex index2 = TestIndex.persistRealtimeAndLoadMMapped(index1);
    return transformToConstructionFeeder(Arrays.asList(makeQueryRunner(factory, "index1", new IncrementalIndexSegment(index1, "index1"), "incremental"), makeQueryRunner(factory, "index2", new QueryableIndexSegment("index2", index2), "queryable")));
}
Also used : CharSource(com.google.common.io.CharSource) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) DelimitedParseSpec(io.druid.data.input.impl.DelimitedParseSpec) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) SelectQueryRunnerFactory(io.druid.query.select.SelectQueryRunnerFactory) SelectQueryConfig(io.druid.query.select.SelectQueryConfig) DateTime(org.joda.time.DateTime) SelectQueryQueryToolChest(io.druid.query.select.SelectQueryQueryToolChest) SelectQueryEngine(io.druid.query.select.SelectQueryEngine) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema)

Example 3 with TimestampSpec

use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class AvroStreamInputRowParser method parseGenericRecord.

protected static InputRow parseGenericRecord(GenericRecord record, ParseSpec parseSpec, List<String> dimensions, boolean fromPigAvroStorage, boolean binaryAsString) {
    GenericRecordAsMap genericRecordAsMap = new GenericRecordAsMap(record, fromPigAvroStorage, binaryAsString);
    TimestampSpec timestampSpec = parseSpec.getTimestampSpec();
    DateTime dateTime = timestampSpec.extractTimestamp(genericRecordAsMap);
    return new MapBasedInputRow(dateTime, dimensions, genericRecordAsMap);
}
Also used : GenericRecordAsMap(io.druid.data.input.avro.GenericRecordAsMap) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DateTime(org.joda.time.DateTime)

Example 4 with TimestampSpec

use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class OrcHadoopInputRowParser method parse.

@Override
public InputRow parse(OrcStruct input) {
    Map<String, Object> map = Maps.newHashMap();
    List<? extends StructField> fields = oip.getAllStructFieldRefs();
    for (StructField field : fields) {
        ObjectInspector objectInspector = field.getFieldObjectInspector();
        switch(objectInspector.getCategory()) {
            case PRIMITIVE:
                PrimitiveObjectInspector primitiveObjectInspector = (PrimitiveObjectInspector) objectInspector;
                map.put(field.getFieldName(), primitiveObjectInspector.getPrimitiveJavaObject(oip.getStructFieldData(input, field)));
                break;
            case // array case - only 1-depth array supported yet
            LIST:
                ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector;
                map.put(field.getFieldName(), getListObject(listObjectInspector, oip.getStructFieldData(input, field)));
                break;
            default:
                break;
        }
    }
    TimestampSpec timestampSpec = parseSpec.getTimestampSpec();
    DateTime dateTime = timestampSpec.extractTimestamp(map);
    return new MapBasedInputRow(dateTime, dimensions, map);
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) TimestampSpec(io.druid.data.input.impl.TimestampSpec) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime)

Example 5 with TimestampSpec

use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class OrcHadoopInputRowParserTest method testTypeFromParseSpec.

@Test
public void testTypeFromParseSpec() {
    ParseSpec parseSpec = new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("col1"), new StringDimensionSchema("col2")), null, null));
    String typeString = OrcHadoopInputRowParser.typeStringFromParseSpec(parseSpec);
    String expected = "struct<timestamp:string,col1:string,col2:string>";
    Assert.assertEquals(expected, typeString);
}
Also used : TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) ParseSpec(io.druid.data.input.impl.ParseSpec) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) Test(org.junit.Test)

Aggregations

TimestampSpec (io.druid.data.input.impl.TimestampSpec)40 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)31 JSONParseSpec (io.druid.data.input.impl.JSONParseSpec)16 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)16 Test (org.junit.Test)15 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)12 Map (java.util.Map)11 DataSchema (io.druid.segment.indexing.DataSchema)10 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)10 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)9 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)8 DateTime (org.joda.time.DateTime)8 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)7 ArrayList (java.util.ArrayList)7 CSVParseSpec (io.druid.data.input.impl.CSVParseSpec)6 StringDimensionSchema (io.druid.data.input.impl.StringDimensionSchema)6 TimeAndDimsParseSpec (io.druid.data.input.impl.TimeAndDimsParseSpec)6 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)6 InputRowParser (io.druid.data.input.impl.InputRowParser)5 MapInputRowParser (io.druid.data.input.impl.MapInputRowParser)5