use of io.druid.java.util.common.ISE in project druid by druid-io.
the class KafkaLookupExtractorFactory method start.
@Override
public boolean start() {
synchronized (started) {
if (started.get()) {
LOG.warn("Already started, not starting again");
return started.get();
}
if (executorService.isShutdown()) {
LOG.warn("Already shut down, not starting again");
return false;
}
final Properties kafkaProperties = new Properties();
kafkaProperties.putAll(getKafkaProperties());
if (kafkaProperties.containsKey("group.id")) {
throw new IAE("Cannot set kafka property [group.id]. Property is randomly generated for you. Found [%s]", kafkaProperties.getProperty("group.id"));
}
if (kafkaProperties.containsKey("auto.offset.reset")) {
throw new IAE("Cannot set kafka property [auto.offset.reset]. Property will be forced to [smallest]. Found [%s]", kafkaProperties.getProperty("auto.offset.reset"));
}
Preconditions.checkNotNull(kafkaProperties.getProperty("zookeeper.connect"), "zookeeper.connect required property");
kafkaProperties.setProperty("group.id", factoryId);
final String topic = getKafkaTopic();
LOG.debug("About to listen to topic [%s] with group.id [%s]", topic, factoryId);
cacheHandler = cacheManager.createCache();
final Map<String, String> map = cacheHandler.getCache();
mapRef.set(map);
// Enable publish-subscribe
kafkaProperties.setProperty("auto.offset.reset", "smallest");
final CountDownLatch startingReads = new CountDownLatch(1);
final ListenableFuture<?> future = executorService.submit(new Runnable() {
@Override
public void run() {
while (!executorService.isShutdown()) {
consumerConnector = buildConnector(kafkaProperties);
try {
if (executorService.isShutdown()) {
break;
}
final List<KafkaStream<String, String>> streams = consumerConnector.createMessageStreamsByFilter(new Whitelist(Pattern.quote(topic)), 1, DEFAULT_STRING_DECODER, DEFAULT_STRING_DECODER);
if (streams == null || streams.isEmpty()) {
throw new IAE("Topic [%s] had no streams", topic);
}
if (streams.size() > 1) {
throw new ISE("Topic [%s] has %d streams! expected 1", topic, streams.size());
}
final KafkaStream<String, String> kafkaStream = streams.get(0);
startingReads.countDown();
for (final MessageAndMetadata<String, String> messageAndMetadata : kafkaStream) {
final String key = messageAndMetadata.key();
final String message = messageAndMetadata.message();
if (key == null || message == null) {
LOG.error("Bad key/message from topic [%s]: [%s]", topic, messageAndMetadata);
continue;
}
doubleEventCount.incrementAndGet();
map.put(key, message);
doubleEventCount.incrementAndGet();
LOG.trace("Placed key[%s] val[%s]", key, message);
}
} catch (Exception e) {
LOG.error(e, "Error reading stream for topic [%s]", topic);
} finally {
consumerConnector.shutdown();
}
}
}
});
Futures.addCallback(future, new FutureCallback<Object>() {
@Override
public void onSuccess(Object result) {
LOG.debug("Success listening to [%s]", topic);
}
@Override
public void onFailure(Throwable t) {
if (t instanceof CancellationException) {
LOG.debug("Topic [%s] cancelled", topic);
} else {
LOG.error(t, "Error in listening to [%s]", topic);
}
}
}, MoreExecutors.sameThreadExecutor());
this.future = future;
final Stopwatch stopwatch = Stopwatch.createStarted();
try {
while (!startingReads.await(100, TimeUnit.MILLISECONDS) && connectTimeout > 0L) {
// Don't return until we have actually connected
if (future.isDone()) {
future.get();
} else {
if (stopwatch.elapsed(TimeUnit.MILLISECONDS) > connectTimeout) {
throw new TimeoutException("Failed to connect to kafka in sufficient time");
}
}
}
} catch (InterruptedException | ExecutionException | TimeoutException e) {
executorService.shutdown();
if (!future.isDone() && !future.cancel(false)) {
LOG.warn("Could not cancel kafka listening thread");
}
LOG.error(e, "Failed to start kafka extraction factory");
cacheHandler.close();
return false;
}
started.set(true);
return true;
}
}
use of io.druid.java.util.common.ISE in project druid by druid-io.
the class HdfsStorageAuthentication method authenticate.
/**
* Dose authenticate against a secured hadoop cluster
* In case of any bug fix make sure to fix the code in JobHelper#authenticate as well.
*/
@LifecycleStart
public void authenticate() {
String principal = hdfsKerberosConfig.getPrincipal();
String keytab = hdfsKerberosConfig.getKeytab();
if (!Strings.isNullOrEmpty(principal) && !Strings.isNullOrEmpty(keytab)) {
UserGroupInformation.setConfiguration(hadoopConf);
if (UserGroupInformation.isSecurityEnabled()) {
try {
if (UserGroupInformation.getCurrentUser().hasKerberosCredentials() == false || !UserGroupInformation.getCurrentUser().getUserName().equals(principal)) {
log.info("Trying to authenticate user [%s] with keytab [%s]..", principal, keytab);
UserGroupInformation.loginUserFromKeytab(principal, keytab);
}
} catch (IOException e) {
throw new ISE(e, "Failed to authenticate user principal [%s] with keytab [%s]", principal, keytab);
}
}
}
}
use of io.druid.java.util.common.ISE in project druid by druid-io.
the class JobHelper method runJobs.
public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config) {
String failedMessage = null;
for (Jobby job : jobs) {
if (failedMessage == null) {
if (!job.run()) {
failedMessage = String.format("Job[%s] failed!", job.getClass());
}
}
}
if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) {
if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) {
Path workingPath = config.makeIntermediatePath();
log.info("Deleting path[%s]", workingPath);
try {
workingPath.getFileSystem(injectSystemProperties(new Configuration())).delete(workingPath, true);
} catch (IOException e) {
log.error(e, "Failed to cleanup path[%s]", workingPath);
}
}
}
if (failedMessage != null) {
throw new ISE(failedMessage);
}
return true;
}
use of io.druid.java.util.common.ISE in project druid by druid-io.
the class HyperLogLogCollector method fold.
public HyperLogLogCollector fold(HyperLogLogCollector other) {
if (other == null || other.storageBuffer.remaining() == 0) {
return this;
}
if (storageBuffer.isReadOnly()) {
convertToMutableByteBuffer();
}
if (storageBuffer.remaining() != getNumBytesForDenseStorage()) {
convertToDenseStorage();
}
estimatedCardinality = null;
if (getRegisterOffset() < other.getRegisterOffset()) {
// "Swap" the buffers so that we are folding into the one with the higher offset
final ByteBuffer tmpBuffer = ByteBuffer.allocate(storageBuffer.remaining());
tmpBuffer.put(storageBuffer.asReadOnlyBuffer());
tmpBuffer.clear();
storageBuffer.duplicate().put(other.storageBuffer.asReadOnlyBuffer());
other = HyperLogLogCollector.makeCollector(tmpBuffer);
}
final ByteBuffer otherBuffer = other.storageBuffer;
// Save position and restore later to avoid allocations due to duplicating the otherBuffer object.
final int otherPosition = otherBuffer.position();
try {
final byte otherOffset = other.getRegisterOffset();
byte myOffset = getRegisterOffset();
short numNonZero = getNumNonZeroRegisters();
final int offsetDiff = myOffset - otherOffset;
if (offsetDiff < 0) {
throw new ISE("offsetDiff[%d] < 0, shouldn't happen because of swap.", offsetDiff);
}
final int myPayloadStart = getPayloadBytePosition();
otherBuffer.position(other.getPayloadBytePosition());
if (isSparse(otherBuffer)) {
while (otherBuffer.hasRemaining()) {
final int payloadStartPosition = otherBuffer.getShort() - other.getNumHeaderBytes();
numNonZero += mergeAndStoreByteRegister(storageBuffer, myPayloadStart + payloadStartPosition, offsetDiff, otherBuffer.get());
}
if (numNonZero == NUM_BUCKETS) {
numNonZero = decrementBuckets();
setRegisterOffset(++myOffset);
setNumNonZeroRegisters(numNonZero);
}
} else {
// dense
int position = getPayloadBytePosition();
while (otherBuffer.hasRemaining()) {
numNonZero += mergeAndStoreByteRegister(storageBuffer, position, offsetDiff, otherBuffer.get());
position++;
}
if (numNonZero == NUM_BUCKETS) {
numNonZero = decrementBuckets();
setRegisterOffset(++myOffset);
setNumNonZeroRegisters(numNonZero);
}
}
// no need to call setRegisterOffset(myOffset) here, since it gets updated every time myOffset is incremented
setNumNonZeroRegisters(numNonZero);
// this will add the max overflow and also recheck if offset needs to be shifted
add(other.getMaxOverflowRegister(), other.getMaxOverflowValue());
return this;
} finally {
otherBuffer.position(otherPosition);
}
}
use of io.druid.java.util.common.ISE in project druid by druid-io.
the class DeterminePartitionsJob method run.
public boolean run() {
try {
if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) {
throw new ISE("DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec());
}
if (!config.getPartitionsSpec().isAssumeGrouped()) {
final Job groupByJob = Job.getInstance(new Configuration(), String.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));
JobHelper.injectSystemProperties(groupByJob);
config.addJobProperties(groupByJob);
groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
groupByJob.setMapOutputKeyClass(BytesWritable.class);
groupByJob.setMapOutputValueClass(NullWritable.class);
groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
groupByJob.setOutputKeyClass(BytesWritable.class);
groupByJob.setOutputValueClass(NullWritable.class);
groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob);
config.addInputPaths(groupByJob);
config.intoConfiguration(groupByJob);
FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());
groupByJob.submit();
log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL());
if (!groupByJob.waitForCompletion(true)) {
log.error("Job failed: %s", groupByJob.getJobID());
return false;
}
} else {
log.info("Skipping group-by job.");
}
/*
* Read grouped data and determine appropriate partitions.
*/
final Job dimSelectionJob = Job.getInstance(new Configuration(), String.format("%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));
dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");
JobHelper.injectSystemProperties(dimSelectionJob);
config.addJobProperties(dimSelectionJob);
if (!config.getPartitionsSpec().isAssumeGrouped()) {
// Read grouped data from the groupByJob.
dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
} else {
// Directly read the source data, since we assume it's already grouped.
dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
config.addInputPaths(dimSelectionJob);
}
SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
dimSelectionJob.setMapOutputValueClass(Text.class);
dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
dimSelectionJob.setOutputKeyClass(BytesWritable.class);
dimSelectionJob.setOutputValueClass(Text.class);
dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class);
dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob);
config.intoConfiguration(dimSelectionJob);
FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());
dimSelectionJob.submit();
log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL());
if (!dimSelectionJob.waitForCompletion(true)) {
log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
return false;
}
/*
* Load partitions determined by the previous job.
*/
log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals());
FileSystem fileSystem = null;
Map<Long, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap();
int shardCount = 0;
for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
if (fileSystem == null) {
fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
}
if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) {
List<ShardSpec> specs = config.JSON_MAPPER.readValue(Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() {
});
List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
for (int i = 0; i < specs.size(); ++i) {
actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i));
}
shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs);
} else {
log.info("Path[%s] didn't exist!?", partitionInfoPath);
}
}
config.setShardSpecs(shardSpecs);
return true;
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
Aggregations