Search in sources :

Example 1 with DruidRecordWriter

use of org.apache.hadoop.hive.druid.io.DruidRecordWriter in project hive by apache.

the class TestDruidRecordWriter method testWrite.

// This test need this patch https://github.com/druid-io/druid/pull/3483
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
    final String dataSourceName = "testDataSource";
    final File segmentOutputDir = temporaryFolder.newFolder();
    final File workingDir = temporaryFolder.newFolder();
    Configuration config = new Configuration();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidTable.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("host")), null, null)));
    final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, Map.class);
    DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularity.DAY, QueryGranularities.NONE, ImmutableList.of(INTERVAL_FULL)), objectMapper);
    RealtimeTuningConfig tuningConfig = RealtimeTuningConfig.makeDefaultTuningConfig(temporaryFolder.newFolder());
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

        @Override
        public File getStorageDirectory() {
            return segmentOutputDir;
        }
    }, objectMapper);
    Path segmentDescriptroPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
    druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptroPath, localFileSystem);
    List<DruidWritable> druidWritables = Lists.transform(expectedRows, new Function<ImmutableMap<String, Object>, DruidWritable>() {

        @Nullable
        @Override
        public DruidWritable apply(@Nullable ImmutableMap<String, Object> input) {
            return new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularity.DAY.truncate(new DateTime((long) input.get(DruidTable.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build());
        }
    });
    for (DruidWritable druidWritable : druidWritables) {
        druidRecordWriter.write(druidWritable);
    }
    druidRecordWriter.close(false);
    List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getPublishedSegments(segmentDescriptroPath, config);
    Assert.assertEquals(1, dataSegmentList.size());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
    final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, QueryGranularities.NONE);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRows, rows);
}
Also used : IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) LocalDataSegmentPusher(io.druid.segment.loading.LocalDataSegmentPusher) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) Configuration(org.apache.hadoop.conf.Configuration) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) LocalDataSegmentPuller(io.druid.segment.loading.LocalDataSegmentPuller) TimestampSpec(io.druid.data.input.impl.TimestampSpec) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) Path(org.apache.hadoop.fs.Path) Firehose(io.druid.data.input.Firehose) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) LocalDataSegmentPusherConfig(io.druid.segment.loading.LocalDataSegmentPusherConfig) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) LocalDataSegmentPusher(io.druid.segment.loading.LocalDataSegmentPusher) ImmutableMap(com.google.common.collect.ImmutableMap) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DataSchema(io.druid.segment.indexing.DataSchema) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) QueryableIndex(io.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(io.druid.data.input.InputRow) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) InputRowParser(io.druid.data.input.impl.InputRowParser) File(java.io.File) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) Nullable(javax.annotation.Nullable) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

ImmutableMap (com.google.common.collect.ImmutableMap)1 Firehose (io.druid.data.input.Firehose)1 InputRow (io.druid.data.input.InputRow)1 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)1 InputRowParser (io.druid.data.input.impl.InputRowParser)1 MapInputRowParser (io.druid.data.input.impl.MapInputRowParser)1 StringDimensionSchema (io.druid.data.input.impl.StringDimensionSchema)1 TimeAndDimsParseSpec (io.druid.data.input.impl.TimeAndDimsParseSpec)1 TimestampSpec (io.druid.data.input.impl.TimestampSpec)1 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)1 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)1 QueryableIndex (io.druid.segment.QueryableIndex)1 QueryableIndexStorageAdapter (io.druid.segment.QueryableIndexStorageAdapter)1 DataSchema (io.druid.segment.indexing.DataSchema)1 RealtimeTuningConfig (io.druid.segment.indexing.RealtimeTuningConfig)1 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)1 DataSegmentPusher (io.druid.segment.loading.DataSegmentPusher)1 LocalDataSegmentPuller (io.druid.segment.loading.LocalDataSegmentPuller)1 LocalDataSegmentPusher (io.druid.segment.loading.LocalDataSegmentPusher)1 LocalDataSegmentPusherConfig (io.druid.segment.loading.LocalDataSegmentPusherConfig)1