use of io.druid.query.timeboundary.TimeBoundaryQuery in project hive by apache.
the class DruidQueryBasedInputFormat method splitSelectQuery.
/* Method that splits Select query depending on the threshold so read can be
* parallelized. We will only contact the Druid broker to obtain all results. */
private static HiveDruidSplit[] splitSelectQuery(Configuration conf, String address, SelectQuery query, Path dummyPath) throws IOException {
final int selectThreshold = (int) HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_DRUID_SELECT_THRESHOLD);
final int numConnection = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_DRUID_NUM_HTTP_CONNECTION);
final Period readTimeout = new Period(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DRUID_HTTP_READ_TIMEOUT));
final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false);
if (isFetch) {
// If it has a limit, we use it and we do not split the query
return new HiveDruidSplit[] { new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] { address }) };
}
// We do not have the number of rows, thus we need to execute a
// Segment Metadata query to obtain number of rows
SegmentMetadataQueryBuilder metadataBuilder = new Druids.SegmentMetadataQueryBuilder();
metadataBuilder.dataSource(query.getDataSource());
metadataBuilder.intervals(query.getIntervals());
metadataBuilder.merge(true);
metadataBuilder.analysisTypes();
SegmentMetadataQuery metadataQuery = metadataBuilder.build();
Lifecycle lifecycle = new Lifecycle();
HttpClient client = HttpClientInit.createClient(HttpClientConfig.builder().withNumConnections(numConnection).withReadTimeout(readTimeout.toStandardDuration()).build(), lifecycle);
try {
lifecycle.start();
} catch (Exception e) {
LOG.error("Lifecycle start issue");
throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
InputStream response;
try {
response = DruidStorageHandlerUtils.submitRequest(client, DruidStorageHandlerUtils.createRequest(address, metadataQuery));
} catch (Exception e) {
lifecycle.stop();
throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
// Retrieve results
List<SegmentAnalysis> metadataList;
try {
metadataList = DruidStorageHandlerUtils.SMILE_MAPPER.readValue(response, new TypeReference<List<SegmentAnalysis>>() {
});
} catch (Exception e) {
response.close();
throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
} finally {
lifecycle.stop();
}
if (metadataList == null) {
throw new IOException("Connected to Druid but could not retrieve datasource information");
}
if (metadataList.isEmpty()) {
// There are no rows for that time range, we can submit query as it is
return new HiveDruidSplit[] { new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] { address }) };
}
if (metadataList.size() != 1) {
throw new IOException("Information about segments should have been merged");
}
final long numRows = metadataList.get(0).getNumRows();
query = query.withPagingSpec(PagingSpec.newSpec(Integer.MAX_VALUE));
if (numRows <= selectThreshold) {
// We are not going to split it
return new HiveDruidSplit[] { new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] { address }) };
}
// If the query does not specify a timestamp, we obtain the total time using
// a Time Boundary query. Then, we use the information to split the query
// following the Select threshold configuration property
final List<Interval> intervals = new ArrayList<>();
if (query.getIntervals().size() == 1 && query.getIntervals().get(0).withChronology(ISOChronology.getInstanceUTC()).equals(DruidTable.DEFAULT_INTERVAL)) {
// Default max and min, we should execute a time boundary query to get a
// more precise range
TimeBoundaryQueryBuilder timeBuilder = new Druids.TimeBoundaryQueryBuilder();
timeBuilder.dataSource(query.getDataSource());
TimeBoundaryQuery timeQuery = timeBuilder.build();
lifecycle = new Lifecycle();
client = HttpClientInit.createClient(HttpClientConfig.builder().withNumConnections(numConnection).withReadTimeout(readTimeout.toStandardDuration()).build(), lifecycle);
try {
lifecycle.start();
} catch (Exception e) {
LOG.error("Lifecycle start issue");
throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
try {
response = DruidStorageHandlerUtils.submitRequest(client, DruidStorageHandlerUtils.createRequest(address, timeQuery));
} catch (Exception e) {
lifecycle.stop();
throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
// Retrieve results
List<Result<TimeBoundaryResultValue>> timeList;
try {
timeList = DruidStorageHandlerUtils.SMILE_MAPPER.readValue(response, new TypeReference<List<Result<TimeBoundaryResultValue>>>() {
});
} catch (Exception e) {
response.close();
throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
} finally {
lifecycle.stop();
}
if (timeList == null || timeList.isEmpty()) {
throw new IOException("Connected to Druid but could not retrieve time boundary information");
}
if (timeList.size() != 1) {
throw new IOException("We should obtain a single time boundary");
}
intervals.add(new Interval(timeList.get(0).getValue().getMinTime().getMillis(), timeList.get(0).getValue().getMaxTime().getMillis(), ISOChronology.getInstanceUTC()));
} else {
intervals.addAll(query.getIntervals());
}
// Create (numRows/default threshold) input splits
int numSplits = (int) Math.ceil((double) numRows / selectThreshold);
List<List<Interval>> newIntervals = createSplitsIntervals(intervals, numSplits);
HiveDruidSplit[] splits = new HiveDruidSplit[numSplits];
for (int i = 0; i < numSplits; i++) {
// Create partial Select query
final SelectQuery partialQuery = query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(newIntervals.get(i)));
splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath, new String[] { address });
}
return splits;
}
use of io.druid.query.timeboundary.TimeBoundaryQuery in project druid by druid-io.
the class CachingClusteredClientTest method testIfNoneMatch.
@Test
public void testIfNoneMatch() throws Exception {
Interval interval = new Interval("2016/2017");
final DataSegment dataSegment = new DataSegment("dataSource", interval, "ver", ImmutableMap.<String, Object>of("type", "hdfs", "path", "/tmp"), ImmutableList.of("product"), ImmutableList.of("visited_sum"), NoneShardSpec.instance(), 9, 12334);
final ServerSelector selector = new ServerSelector(dataSegment, new HighestPriorityTierSelectorStrategy(new RandomServerSelectorStrategy()));
selector.addServerAndUpdateSegment(new QueryableDruidServer(servers[0], null), dataSegment);
timeline.add(interval, "ver", new SingleElementPartitionChunk<>(selector));
TimeBoundaryQuery query = Druids.newTimeBoundaryQueryBuilder().dataSource(DATA_SOURCE).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))).context(ImmutableMap.<String, Object>of("If-None-Match", "aVJV29CJY93rszVW/QBy0arWZo0=")).build();
Map<String, String> responseContext = new HashMap<>();
client.run(query, responseContext);
Assert.assertEquals("Z/eS4rQz5v477iq7Aashr6JPZa0=", responseContext.get("ETag"));
}
use of io.druid.query.timeboundary.TimeBoundaryQuery in project druid by druid-io.
the class DirectDruidClientTest method testCancel.
@Test
public void testCancel() throws Exception {
HttpClient httpClient = EasyMock.createStrictMock(HttpClient.class);
Capture<Request> capturedRequest = EasyMock.newCapture();
ListenableFuture<Object> cancelledFuture = Futures.immediateCancelledFuture();
SettableFuture<Object> cancellationFuture = SettableFuture.create();
EasyMock.expect(httpClient.go(EasyMock.capture(capturedRequest), EasyMock.<HttpResponseHandler>anyObject())).andReturn(cancelledFuture).once();
EasyMock.expect(httpClient.go(EasyMock.capture(capturedRequest), EasyMock.<HttpResponseHandler>anyObject())).andReturn(cancellationFuture).once();
EasyMock.replay(httpClient);
final ServerSelector serverSelector = new ServerSelector(new DataSegment("test", new Interval("2013-01-01/2013-01-02"), new DateTime("2013-01-01").toString(), Maps.<String, Object>newHashMap(), Lists.<String>newArrayList(), Lists.<String>newArrayList(), NoneShardSpec.instance(), 0, 0L), new HighestPriorityTierSelectorStrategy(new ConnectionCountServerSelectorStrategy()));
DirectDruidClient client1 = new DirectDruidClient(new ReflectionQueryToolChestWarehouse(), QueryRunnerTestHelper.NOOP_QUERYWATCHER, new DefaultObjectMapper(), httpClient, "foo", new NoopServiceEmitter());
QueryableDruidServer queryableDruidServer1 = new QueryableDruidServer(new DruidServer("test1", "localhost", 0, "historical", DruidServer.DEFAULT_TIER, 0), client1);
serverSelector.addServerAndUpdateSegment(queryableDruidServer1, serverSelector.getSegment());
TimeBoundaryQuery query = Druids.newTimeBoundaryQueryBuilder().dataSource("test").build();
HashMap<String, List> context = Maps.newHashMap();
cancellationFuture.set(new StatusResponseHolder(HttpResponseStatus.OK, new StringBuilder("cancelled")));
Sequence results = client1.run(query, context);
Assert.assertEquals(HttpMethod.DELETE, capturedRequest.getValue().getMethod());
Assert.assertEquals(0, client1.getNumOpenConnections());
QueryInterruptedException exception = null;
try {
Sequences.toList(results, Lists.newArrayList());
} catch (QueryInterruptedException e) {
exception = e;
}
Assert.assertNotNull(exception);
EasyMock.verify(httpClient);
}
use of io.druid.query.timeboundary.TimeBoundaryQuery in project druid by druid-io.
the class TimewarpOperator method postProcess.
public QueryRunner<T> postProcess(final QueryRunner<T> baseRunner, final long now) {
return new QueryRunner<T>() {
@Override
public Sequence<T> run(final Query<T> query, final Map<String, Object> responseContext) {
final long offset = computeOffset(now);
final Interval interval = query.getIntervals().get(0);
final Interval modifiedInterval = new Interval(Math.min(interval.getStartMillis() + offset, now + offset), Math.min(interval.getEndMillis() + offset, now + offset));
return Sequences.map(baseRunner.run(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Arrays.asList(modifiedInterval))), responseContext), new Function<T, T>() {
@Override
public T apply(T input) {
if (input instanceof Result) {
Result res = (Result) input;
Object value = res.getValue();
if (value instanceof TimeBoundaryResultValue) {
TimeBoundaryResultValue boundary = (TimeBoundaryResultValue) value;
DateTime minTime = null;
try {
minTime = boundary.getMinTime();
} catch (IllegalArgumentException e) {
}
final DateTime maxTime = boundary.getMaxTime();
return (T) ((TimeBoundaryQuery) query).buildResult(new DateTime(Math.min(res.getTimestamp().getMillis() - offset, now)), minTime != null ? minTime.minus(offset) : null, maxTime != null ? new DateTime(Math.min(maxTime.getMillis() - offset, now)) : null).iterator().next();
}
return (T) new Result(res.getTimestamp().minus(offset), value);
} else if (input instanceof MapBasedRow) {
MapBasedRow row = (MapBasedRow) input;
return (T) new MapBasedRow(row.getTimestamp().minus(offset), row.getEvent());
}
// default to noop for unknown result types
return input;
}
});
}
};
}
use of io.druid.query.timeboundary.TimeBoundaryQuery in project druid by druid-io.
the class AppendTest method testTimeBoundary2.
@Test
public void testTimeBoundary2() {
List<Result<TimeBoundaryResultValue>> expectedResults = Arrays.asList(new Result<TimeBoundaryResultValue>(new DateTime("2011-01-12T00:00:00.000Z"), new TimeBoundaryResultValue(ImmutableMap.of(TimeBoundaryQuery.MIN_TIME, new DateTime("2011-01-12T00:00:00.000Z"), TimeBoundaryQuery.MAX_TIME, new DateTime("2011-01-15T00:00:00.000Z")))));
TimeBoundaryQuery query = Druids.newTimeBoundaryQueryBuilder().dataSource(dataSource).build();
QueryRunner runner = TestQueryRunners.makeTimeBoundaryQueryRunner(segment2);
HashMap<String, Object> context = new HashMap<String, Object>();
TestHelper.assertExpectedResults(expectedResults, runner.run(query, context));
}
Aggregations