use of io.druid.data.input.InputRow in project druid by druid-io.
the class RealtimeIndexTaskTest method testRestoreAfterHandoffAttemptDuringShutdown.
@Test(timeout = 60_000L)
public void testRestoreAfterHandoffAttemptDuringShutdown() throws Exception {
final TaskStorage taskStorage = new HeapMemoryTaskStorage(new TaskStorageConfig(null));
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final File directory = tempFolder.newFolder();
final RealtimeIndexTask task1 = makeRealtimeTask(null);
final DataSegment publishedSegment;
// First run:
{
final TaskToolbox taskToolbox = makeToolbox(task1, taskStorage, mdc, directory);
final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task1.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task1.getFirehose();
firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
// Stop the firehose, this will trigger a finishJob.
firehose.close();
// Wait for publish.
while (mdc.getPublished().isEmpty()) {
Thread.sleep(50);
}
publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
// Do a query.
Assert.assertEquals(1, sumMetric(task1, "rows"));
// Trigger graceful shutdown.
task1.stopGracefully();
// Wait for the task to finish. The status doesn't really matter.
while (!statusFuture.isDone()) {
Thread.sleep(50);
}
}
// Second run:
{
final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
final TaskToolbox taskToolbox = makeToolbox(task2, taskStorage, mdc, directory);
final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task2.getFirehose() == null) {
Thread.sleep(50);
}
// Stop the firehose again, this will start another handoff.
final TestFirehose firehose = (TestFirehose) task2.getFirehose();
// Stop the firehose, this will trigger a finishJob.
firehose.close();
// publishedSegment is still published. No reason it shouldn't be.
Assert.assertEquals(ImmutableSet.of(publishedSegment), mdc.getPublished());
// Wait for a handoffCallback to show up.
while (handOffCallbacks.isEmpty()) {
Thread.sleep(50);
}
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
}
}
use of io.druid.data.input.InputRow in project druid by druid-io.
the class RealtimeIndexTaskTest method testRestoreCorruptData.
@Test(timeout = 60_000L)
public void testRestoreCorruptData() throws Exception {
final File directory = tempFolder.newFolder();
final RealtimeIndexTask task1 = makeRealtimeTask(null);
// First run:
{
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final TaskToolbox taskToolbox = makeToolbox(task1, mdc, directory);
final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task1.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task1.getFirehose();
firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
// Trigger graceful shutdown.
task1.stopGracefully();
// Wait for the task to finish. The status doesn't really matter, but we'll check it anyway.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
// Nothing should be published.
Assert.assertEquals(Sets.newHashSet(), mdc.getPublished());
}
// Corrupt the data:
final File smooshFile = new File(String.format("%s/persistent/task/%s/work/persist/%s/%s_%s/0/00000.smoosh", directory, task1.getId(), task1.getDataSource(), Granularities.DAY.bucketStart(now), Granularities.DAY.bucketEnd(now)));
Files.write(smooshFile.toPath(), "oops!".getBytes(Charsets.UTF_8));
// Second run:
{
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
final TaskToolbox taskToolbox = makeToolbox(task2, mdc, directory);
final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
// Wait for the task to finish.
boolean caught = false;
try {
statusFuture.get();
} catch (Exception e) {
caught = true;
}
Assert.assertTrue("expected exception", caught);
}
}
use of io.druid.data.input.InputRow in project druid by druid-io.
the class HyperUniquesSerdeForTest method getExtractor.
@Override
public ComplexMetricExtractor getExtractor() {
return new ComplexMetricExtractor() {
@Override
public Class<HyperLogLogCollector> extractedClass() {
return HyperLogLogCollector.class;
}
@Override
public HyperLogLogCollector extractValue(InputRow inputRow, String metricName) {
Object rawValue = inputRow.getRaw(metricName);
if (rawValue instanceof HyperLogLogCollector) {
return (HyperLogLogCollector) rawValue;
} else {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
List<String> dimValues = inputRow.getDimension(metricName);
if (dimValues == null) {
return collector;
}
for (String dimensionValue : dimValues) {
collector.add(hashFn.hashBytes(StringUtils.toUtf8(dimensionValue)).asBytes());
}
return collector;
}
}
};
}
use of io.druid.data.input.InputRow in project druid by druid-io.
the class RocketMQFirehoseFactory method connect.
@Override
public Firehose connect(ByteBufferInputRowParser byteBufferInputRowParser) throws IOException, ParseException {
Set<String> newDimExclus = Sets.union(byteBufferInputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions(), Sets.newHashSet("feed"));
final ByteBufferInputRowParser theParser = byteBufferInputRowParser.withParseSpec(byteBufferInputRowParser.getParseSpec().withDimensionsSpec(byteBufferInputRowParser.getParseSpec().getDimensionsSpec().withDimensionExclusions(newDimExclus)));
/**
* Topic-Queue mapping.
*/
final ConcurrentHashMap<String, Set<MessageQueue>> topicQueueMap;
/**
* Default Pull-style client for RocketMQ.
*/
final DefaultMQPullConsumer defaultMQPullConsumer;
final DruidPullMessageService pullMessageService;
messageQueueTreeSetMap.clear();
windows.clear();
try {
defaultMQPullConsumer = new DefaultMQPullConsumer(this.consumerGroup);
defaultMQPullConsumer.setMessageModel(MessageModel.CLUSTERING);
topicQueueMap = new ConcurrentHashMap<>();
pullMessageService = new DruidPullMessageService(defaultMQPullConsumer);
for (String topic : feed) {
Validators.checkTopic(topic);
topicQueueMap.put(topic, defaultMQPullConsumer.fetchSubscribeMessageQueues(topic));
}
DruidMessageQueueListener druidMessageQueueListener = new DruidMessageQueueListener(Sets.newHashSet(feed), topicQueueMap, defaultMQPullConsumer);
defaultMQPullConsumer.setMessageQueueListener(druidMessageQueueListener);
defaultMQPullConsumer.start();
pullMessageService.start();
} catch (MQClientException e) {
LOGGER.error("Failed to start DefaultMQPullConsumer", e);
throw new IOException("Failed to start RocketMQ client", e);
}
return new Firehose() {
@Override
public boolean hasMore() {
boolean hasMore = false;
DruidPullRequest earliestPullRequest = null;
for (Map.Entry<String, Set<MessageQueue>> entry : topicQueueMap.entrySet()) {
for (MessageQueue messageQueue : entry.getValue()) {
if (JavaCompatUtils.keySet(messageQueueTreeSetMap).contains(messageQueue) && !messageQueueTreeSetMap.get(messageQueue).isEmpty()) {
hasMore = true;
} else {
try {
long offset = defaultMQPullConsumer.fetchConsumeOffset(messageQueue, false);
int batchSize = (null == pullBatchSize || pullBatchSize.isEmpty()) ? DEFAULT_PULL_BATCH_SIZE : Integer.parseInt(pullBatchSize);
DruidPullRequest newPullRequest = new DruidPullRequest(messageQueue, null, offset, batchSize, !hasMessagesPending());
// notify pull message service to pull messages from brokers.
pullMessageService.putRequest(newPullRequest);
// set the earliest pull in case we need to block.
if (null == earliestPullRequest) {
earliestPullRequest = newPullRequest;
}
} catch (MQClientException e) {
LOGGER.error("Failed to fetch consume offset for queue: {}", entry.getKey());
}
}
}
}
// Block only when there is no locally pending messages.
if (!hasMore && null != earliestPullRequest) {
try {
earliestPullRequest.getCountDownLatch().await();
hasMore = true;
} catch (InterruptedException e) {
LOGGER.error("CountDownLatch await got interrupted", e);
}
}
return hasMore;
}
@Override
public InputRow nextRow() {
for (Map.Entry<MessageQueue, ConcurrentSkipListSet<MessageExt>> entry : messageQueueTreeSetMap.entrySet()) {
if (!entry.getValue().isEmpty()) {
MessageExt message = entry.getValue().pollFirst();
InputRow inputRow = theParser.parse(ByteBuffer.wrap(message.getBody()));
if (!JavaCompatUtils.keySet(windows).contains(entry.getKey())) {
windows.put(entry.getKey(), new ConcurrentSkipListSet<Long>());
}
windows.get(entry.getKey()).add(message.getQueueOffset());
return inputRow;
}
}
// should never happen.
throw new RuntimeException("Unexpected Fatal Error! There should have been one row available.");
}
@Override
public Runnable commit() {
return new Runnable() {
@Override
public void run() {
OffsetStore offsetStore = defaultMQPullConsumer.getOffsetStore();
Set<MessageQueue> updated = new HashSet<>();
// calculate offsets according to consuming windows.
for (ConcurrentHashMap.Entry<MessageQueue, ConcurrentSkipListSet<Long>> entry : windows.entrySet()) {
while (!entry.getValue().isEmpty()) {
long offset = offsetStore.readOffset(entry.getKey(), ReadOffsetType.MEMORY_FIRST_THEN_STORE);
if (offset + 1 > entry.getValue().first()) {
entry.getValue().pollFirst();
} else if (offset + 1 == entry.getValue().first()) {
entry.getValue().pollFirst();
offsetStore.updateOffset(entry.getKey(), offset + 1, true);
updated.add(entry.getKey());
} else {
break;
}
}
}
offsetStore.persistAll(updated);
}
};
}
@Override
public void close() throws IOException {
defaultMQPullConsumer.shutdown();
pullMessageService.shutdown(false);
}
};
}
use of io.druid.data.input.InputRow in project druid by druid-io.
the class DruidParquetInputTest method testBinaryAsString.
@Test
public void testBinaryAsString() throws IOException, InterruptedException {
HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromFile(new File("example/impala_hadoop_parquet_job.json"));
Job job = Job.getInstance(new Configuration());
config.intoConfiguration(job);
GenericRecord data = getFirstRecord(job, ((StaticPathSpec) config.getPathSpec()).getPaths());
InputRow row = config.getParser().parse(data);
// without binaryAsString: true, the value would something like "[104, 101, 121, 32, 116, 104, 105, 115, 32, 105, 115, 3.... ]"
assertEquals(row.getDimension("field").get(0), "hey this is &é(-è_çà)=^$ù*! Ω^^");
assertEquals(row.getTimestampFromEpoch(), 1471800234);
}
Aggregations