Examples with DruidWritable - org.apache.hadoop.hive.druid.serde.DruidWritable

Example 1 with DruidWritable

use of org.apache.hadoop.hive.druid.serde.DruidWritable in project hive by apache.

the class TestDruidRecordWriter method testWrite.

// This test need this patch https://github.com/druid-io/druid/pull/3483
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
    final String dataSourceName = "testDataSource";
    final File segmentOutputDir = temporaryFolder.newFolder();
    final File workingDir = temporaryFolder.newFolder();
    Configuration config = new Configuration();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidTable.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("host")), null, null)));
    final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, Map.class);
    DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularity.DAY, QueryGranularities.NONE, ImmutableList.of(INTERVAL_FULL)), objectMapper);
    RealtimeTuningConfig tuningConfig = RealtimeTuningConfig.makeDefaultTuningConfig(temporaryFolder.newFolder());
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

        @Override
        public File getStorageDirectory() {
            return segmentOutputDir;
        }
    }, objectMapper);
    Path segmentDescriptroPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
    druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptroPath, localFileSystem);
    List<DruidWritable> druidWritables = Lists.transform(expectedRows, new Function<ImmutableMap<String, Object>, DruidWritable>() {

        @Nullable
        @Override
        public DruidWritable apply(@Nullable ImmutableMap<String, Object> input) {
            return new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularity.DAY.truncate(new DateTime((long) input.get(DruidTable.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build());
        }
    });
    for (DruidWritable druidWritable : druidWritables) {
        druidRecordWriter.write(druidWritable);
    }
    druidRecordWriter.close(false);
    List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getPublishedSegments(segmentDescriptroPath, config);
    Assert.assertEquals(1, dataSegmentList.size());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
    final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, QueryGranularities.NONE);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRows, rows);
}

Also used : IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) LocalDataSegmentPusher(io.druid.segment.loading.LocalDataSegmentPusher) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) Configuration(org.apache.hadoop.conf.Configuration) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) LocalDataSegmentPuller(io.druid.segment.loading.LocalDataSegmentPuller) TimestampSpec(io.druid.data.input.impl.TimestampSpec) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) Path(org.apache.hadoop.fs.Path) Firehose(io.druid.data.input.Firehose) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) LocalDataSegmentPusherConfig(io.druid.segment.loading.LocalDataSegmentPusherConfig) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) LocalDataSegmentPusher(io.druid.segment.loading.LocalDataSegmentPusher) ImmutableMap(com.google.common.collect.ImmutableMap) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DataSchema(io.druid.segment.indexing.DataSchema) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) QueryableIndex(io.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(io.druid.data.input.InputRow) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) InputRowParser(io.druid.data.input.impl.InputRowParser) File(java.io.File) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) Nullable(javax.annotation.Nullable) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 2 with DruidWritable

use of org.apache.hadoop.hive.druid.serde.DruidWritable in project hive by apache.

the class DruidRecordWriter method write.

@Override
public void write(Writable w) throws IOException {
    DruidWritable record = (DruidWritable) w;
    final long timestamp = (long) record.getValue().get(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
    final long truncatedTime = (long) record.getValue().get(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME);
    InputRow inputRow = new MapBasedInputRow(timestamp, dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), record.getValue());
    try {
        appenderator.add(getSegmentIdentifierAndMaybePush(truncatedTime), inputRow, committerSupplier);
    } catch (SegmentNotWritableException e) {
        throw new IOException(e);
    }
}

Also used : DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) SegmentNotWritableException(io.druid.segment.realtime.appenderator.SegmentNotWritableException) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) IOException(java.io.IOException)

Example 3 with DruidWritable

use of org.apache.hadoop.hive.druid.serde.DruidWritable in project hive by apache.

the class TestDruidSerDe method deserializeQueryResults.

private static void deserializeQueryResults(DruidSerDe serDe, String queryType, String jsonQuery, String resultString, Object[][] records) throws SerDeException, JsonParseException, JsonMappingException, IOException, NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException, InterruptedException, NoSuchMethodException, InvocationTargetException {
    // Initialize
    Query<?> query = null;
    DruidQueryRecordReader<?, ?> reader = null;
    List<?> resultsList = null;
    ObjectMapper mapper = new DefaultObjectMapper();
    switch(queryType) {
        case Query.TIMESERIES:
            query = mapper.readValue(jsonQuery, TimeseriesQuery.class);
            reader = new DruidTimeseriesQueryRecordReader();
            resultsList = mapper.readValue(resultString, new TypeReference<List<Result<TimeseriesResultValue>>>() {
            });
            break;
        case Query.TOPN:
            query = mapper.readValue(jsonQuery, TopNQuery.class);
            reader = new DruidTopNQueryRecordReader();
            resultsList = mapper.readValue(resultString, new TypeReference<List<Result<TopNResultValue>>>() {
            });
            break;
        case Query.GROUP_BY:
            query = mapper.readValue(jsonQuery, GroupByQuery.class);
            reader = new DruidGroupByQueryRecordReader();
            resultsList = mapper.readValue(resultString, new TypeReference<List<Row>>() {
            });
            break;
        case Query.SELECT:
            query = mapper.readValue(jsonQuery, SelectQuery.class);
            reader = new DruidSelectQueryRecordReader();
            resultsList = mapper.readValue(resultString, new TypeReference<List<Result<SelectResultValue>>>() {
            });
            break;
    }
    // Set query and fields access
    Field field1 = DruidQueryRecordReader.class.getDeclaredField("query");
    field1.setAccessible(true);
    field1.set(reader, query);
    if (reader instanceof DruidGroupByQueryRecordReader) {
        Method method1 = DruidGroupByQueryRecordReader.class.getDeclaredMethod("initExtractors");
        method1.setAccessible(true);
        method1.invoke(reader);
    }
    Field field2 = DruidQueryRecordReader.class.getDeclaredField("results");
    field2.setAccessible(true);
    // Get the row structure
    StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    // Check mapred
    Iterator<?> results = resultsList.iterator();
    field2.set(reader, results);
    DruidWritable writable = new DruidWritable();
    int pos = 0;
    while (reader.next(NullWritable.get(), writable)) {
        Object row = serDe.deserialize(writable);
        Object[] expectedFieldsData = records[pos];
        assertEquals(expectedFieldsData.length, fieldRefs.size());
        for (int i = 0; i < fieldRefs.size(); i++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
            assertEquals("Field " + i, expectedFieldsData[i], fieldData);
        }
        pos++;
    }
    assertEquals(pos, records.length);
    // Check mapreduce
    results = resultsList.iterator();
    field2.set(reader, results);
    pos = 0;
    while (reader.nextKeyValue()) {
        Object row = serDe.deserialize(reader.getCurrentValue());
        Object[] expectedFieldsData = records[pos];
        assertEquals(expectedFieldsData.length, fieldRefs.size());
        for (int i = 0; i < fieldRefs.size(); i++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
            assertEquals("Field " + i, expectedFieldsData[i], fieldData);
        }
        pos++;
    }
    assertEquals(pos, records.length);
}

Also used : Result(io.druid.query.Result) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Field(java.lang.reflect.Field) GroupByQuery(io.druid.query.groupby.GroupByQuery) TopNQuery(io.druid.query.topn.TopNQuery) TypeReference(com.fasterxml.jackson.core.type.TypeReference) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DruidTopNQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) DruidSelectQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader) Method(java.lang.reflect.Method) SelectQuery(io.druid.query.select.SelectQuery) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) DruidTimeseriesQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidTimeseriesQueryRecordReader) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DruidGroupByQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader) Row(io.druid.data.input.Row) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 4 with DruidWritable

use of org.apache.hadoop.hive.druid.serde.DruidWritable in project hive by apache.

the class TestDruidSerDe method serializeObject.

private static void serializeObject(Properties properties, DruidSerDe serDe, Object[] rowObject, DruidWritable druidWritable) throws SerDeException {
    // Build OI with timestamp granularity column
    final List<String> columnNames = new ArrayList<>();
    final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
    List<ObjectInspector> inspectors = new ArrayList<>();
    columnNames.addAll(Utilities.getColumnNames(properties));
    columnNames.add(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME);
    columnTypes.addAll(Lists.transform(Utilities.getColumnTypes(properties), new Function<String, PrimitiveTypeInfo>() {

        @Override
        public PrimitiveTypeInfo apply(String type) {
            return TypeInfoFactory.getPrimitiveTypeInfo(type);
        }
    }));
    columnTypes.add(TypeInfoFactory.getPrimitiveTypeInfo("timestamp"));
    inspectors.addAll(Lists.transform(columnTypes, new Function<PrimitiveTypeInfo, ObjectInspector>() {

        @Override
        public ObjectInspector apply(PrimitiveTypeInfo type) {
            return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type);
        }
    }));
    ObjectInspector inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
    // Serialize
    DruidWritable writable = (DruidWritable) serDe.serialize(rowObject, inspector);
    // Check result
    assertEquals(DRUID_WRITABLE.getValue().size(), writable.getValue().size());
    for (Entry<String, Object> e : DRUID_WRITABLE.getValue().entrySet()) {
        assertEquals(e.getValue(), writable.getValue().get(e.getKey()));
    }
}

Also used : Function(com.google.common.base.Function) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

DruidWritable (org.apache.hadoop.hive.druid.serde.DruidWritable)4 InputRow (io.druid.data.input.InputRow)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2 TypeReference (com.fasterxml.jackson.core.type.TypeReference)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Function (com.google.common.base.Function)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Firehose (io.druid.data.input.Firehose)1 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)1 Row (io.druid.data.input.Row)1 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)1 InputRowParser (io.druid.data.input.impl.InputRowParser)1 MapInputRowParser (io.druid.data.input.impl.MapInputRowParser)1 StringDimensionSchema (io.druid.data.input.impl.StringDimensionSchema)1 TimeAndDimsParseSpec (io.druid.data.input.impl.TimeAndDimsParseSpec)1 TimestampSpec (io.druid.data.input.impl.TimestampSpec)1 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)1 Result (io.druid.query.Result)1 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)1 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)1