use of org.apache.hadoop.hive.druid.serde.DruidWritable in project hive by apache.
the class TestDruidRecordWriter method testWrite.
// This test need this patch https://github.com/druid-io/druid/pull/3483
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
final String dataSourceName = "testDataSource";
final File segmentOutputDir = temporaryFolder.newFolder();
final File workingDir = temporaryFolder.newFolder();
Configuration config = new Configuration();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidTable.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("host")), null, null)));
final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, Map.class);
DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularity.DAY, QueryGranularities.NONE, ImmutableList.of(INTERVAL_FULL)), objectMapper);
RealtimeTuningConfig tuningConfig = RealtimeTuningConfig.makeDefaultTuningConfig(temporaryFolder.newFolder());
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {
@Override
public File getStorageDirectory() {
return segmentOutputDir;
}
}, objectMapper);
Path segmentDescriptroPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptroPath, localFileSystem);
List<DruidWritable> druidWritables = Lists.transform(expectedRows, new Function<ImmutableMap<String, Object>, DruidWritable>() {
@Nullable
@Override
public DruidWritable apply(@Nullable ImmutableMap<String, Object> input) {
return new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularity.DAY.truncate(new DateTime((long) input.get(DruidTable.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build());
}
});
for (DruidWritable druidWritable : druidWritables) {
druidRecordWriter.write(druidWritable);
}
druidRecordWriter.close(false);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getPublishedSegments(segmentDescriptroPath, config);
Assert.assertEquals(1, dataSegmentList.size());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, QueryGranularities.NONE);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRows, rows);
}
use of org.apache.hadoop.hive.druid.serde.DruidWritable in project hive by apache.
the class DruidRecordWriter method write.
@Override
public void write(Writable w) throws IOException {
DruidWritable record = (DruidWritable) w;
final long timestamp = (long) record.getValue().get(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
final long truncatedTime = (long) record.getValue().get(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME);
InputRow inputRow = new MapBasedInputRow(timestamp, dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), record.getValue());
try {
appenderator.add(getSegmentIdentifierAndMaybePush(truncatedTime), inputRow, committerSupplier);
} catch (SegmentNotWritableException e) {
throw new IOException(e);
}
}
use of org.apache.hadoop.hive.druid.serde.DruidWritable in project hive by apache.
the class TestDruidSerDe method deserializeQueryResults.
private static void deserializeQueryResults(DruidSerDe serDe, String queryType, String jsonQuery, String resultString, Object[][] records) throws SerDeException, JsonParseException, JsonMappingException, IOException, NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException, InterruptedException, NoSuchMethodException, InvocationTargetException {
// Initialize
Query<?> query = null;
DruidQueryRecordReader<?, ?> reader = null;
List<?> resultsList = null;
ObjectMapper mapper = new DefaultObjectMapper();
switch(queryType) {
case Query.TIMESERIES:
query = mapper.readValue(jsonQuery, TimeseriesQuery.class);
reader = new DruidTimeseriesQueryRecordReader();
resultsList = mapper.readValue(resultString, new TypeReference<List<Result<TimeseriesResultValue>>>() {
});
break;
case Query.TOPN:
query = mapper.readValue(jsonQuery, TopNQuery.class);
reader = new DruidTopNQueryRecordReader();
resultsList = mapper.readValue(resultString, new TypeReference<List<Result<TopNResultValue>>>() {
});
break;
case Query.GROUP_BY:
query = mapper.readValue(jsonQuery, GroupByQuery.class);
reader = new DruidGroupByQueryRecordReader();
resultsList = mapper.readValue(resultString, new TypeReference<List<Row>>() {
});
break;
case Query.SELECT:
query = mapper.readValue(jsonQuery, SelectQuery.class);
reader = new DruidSelectQueryRecordReader();
resultsList = mapper.readValue(resultString, new TypeReference<List<Result<SelectResultValue>>>() {
});
break;
}
// Set query and fields access
Field field1 = DruidQueryRecordReader.class.getDeclaredField("query");
field1.setAccessible(true);
field1.set(reader, query);
if (reader instanceof DruidGroupByQueryRecordReader) {
Method method1 = DruidGroupByQueryRecordReader.class.getDeclaredMethod("initExtractors");
method1.setAccessible(true);
method1.invoke(reader);
}
Field field2 = DruidQueryRecordReader.class.getDeclaredField("results");
field2.setAccessible(true);
// Get the row structure
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
// Check mapred
Iterator<?> results = resultsList.iterator();
field2.set(reader, results);
DruidWritable writable = new DruidWritable();
int pos = 0;
while (reader.next(NullWritable.get(), writable)) {
Object row = serDe.deserialize(writable);
Object[] expectedFieldsData = records[pos];
assertEquals(expectedFieldsData.length, fieldRefs.size());
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
assertEquals("Field " + i, expectedFieldsData[i], fieldData);
}
pos++;
}
assertEquals(pos, records.length);
// Check mapreduce
results = resultsList.iterator();
field2.set(reader, results);
pos = 0;
while (reader.nextKeyValue()) {
Object row = serDe.deserialize(reader.getCurrentValue());
Object[] expectedFieldsData = records[pos];
assertEquals(expectedFieldsData.length, fieldRefs.size());
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
assertEquals("Field " + i, expectedFieldsData[i], fieldData);
}
pos++;
}
assertEquals(pos, records.length);
}
use of org.apache.hadoop.hive.druid.serde.DruidWritable in project hive by apache.
the class TestDruidSerDe method serializeObject.
private static void serializeObject(Properties properties, DruidSerDe serDe, Object[] rowObject, DruidWritable druidWritable) throws SerDeException {
// Build OI with timestamp granularity column
final List<String> columnNames = new ArrayList<>();
final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
List<ObjectInspector> inspectors = new ArrayList<>();
columnNames.addAll(Utilities.getColumnNames(properties));
columnNames.add(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME);
columnTypes.addAll(Lists.transform(Utilities.getColumnTypes(properties), new Function<String, PrimitiveTypeInfo>() {
@Override
public PrimitiveTypeInfo apply(String type) {
return TypeInfoFactory.getPrimitiveTypeInfo(type);
}
}));
columnTypes.add(TypeInfoFactory.getPrimitiveTypeInfo("timestamp"));
inspectors.addAll(Lists.transform(columnTypes, new Function<PrimitiveTypeInfo, ObjectInspector>() {
@Override
public ObjectInspector apply(PrimitiveTypeInfo type) {
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type);
}
}));
ObjectInspector inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
// Serialize
DruidWritable writable = (DruidWritable) serDe.serialize(rowObject, inspector);
// Check result
assertEquals(DRUID_WRITABLE.getValue().size(), writable.getValue().size());
for (Entry<String, Object> e : DRUID_WRITABLE.getValue().entrySet()) {
assertEquals(e.getValue(), writable.getValue().get(e.getKey()));
}
}
Aggregations