Search in sources :

Example 1 with ISE

use of io.druid.java.util.common.ISE in project druid by druid-io.

the class KafkaLookupExtractorFactory method start.

@Override
public boolean start() {
    synchronized (started) {
        if (started.get()) {
            LOG.warn("Already started, not starting again");
            return started.get();
        }
        if (executorService.isShutdown()) {
            LOG.warn("Already shut down, not starting again");
            return false;
        }
        final Properties kafkaProperties = new Properties();
        kafkaProperties.putAll(getKafkaProperties());
        if (kafkaProperties.containsKey("group.id")) {
            throw new IAE("Cannot set kafka property [group.id]. Property is randomly generated for you. Found [%s]", kafkaProperties.getProperty("group.id"));
        }
        if (kafkaProperties.containsKey("auto.offset.reset")) {
            throw new IAE("Cannot set kafka property [auto.offset.reset]. Property will be forced to [smallest]. Found [%s]", kafkaProperties.getProperty("auto.offset.reset"));
        }
        Preconditions.checkNotNull(kafkaProperties.getProperty("zookeeper.connect"), "zookeeper.connect required property");
        kafkaProperties.setProperty("group.id", factoryId);
        final String topic = getKafkaTopic();
        LOG.debug("About to listen to topic [%s] with group.id [%s]", topic, factoryId);
        cacheHandler = cacheManager.createCache();
        final Map<String, String> map = cacheHandler.getCache();
        mapRef.set(map);
        // Enable publish-subscribe
        kafkaProperties.setProperty("auto.offset.reset", "smallest");
        final CountDownLatch startingReads = new CountDownLatch(1);
        final ListenableFuture<?> future = executorService.submit(new Runnable() {

            @Override
            public void run() {
                while (!executorService.isShutdown()) {
                    consumerConnector = buildConnector(kafkaProperties);
                    try {
                        if (executorService.isShutdown()) {
                            break;
                        }
                        final List<KafkaStream<String, String>> streams = consumerConnector.createMessageStreamsByFilter(new Whitelist(Pattern.quote(topic)), 1, DEFAULT_STRING_DECODER, DEFAULT_STRING_DECODER);
                        if (streams == null || streams.isEmpty()) {
                            throw new IAE("Topic [%s] had no streams", topic);
                        }
                        if (streams.size() > 1) {
                            throw new ISE("Topic [%s] has %d streams! expected 1", topic, streams.size());
                        }
                        final KafkaStream<String, String> kafkaStream = streams.get(0);
                        startingReads.countDown();
                        for (final MessageAndMetadata<String, String> messageAndMetadata : kafkaStream) {
                            final String key = messageAndMetadata.key();
                            final String message = messageAndMetadata.message();
                            if (key == null || message == null) {
                                LOG.error("Bad key/message from topic [%s]: [%s]", topic, messageAndMetadata);
                                continue;
                            }
                            doubleEventCount.incrementAndGet();
                            map.put(key, message);
                            doubleEventCount.incrementAndGet();
                            LOG.trace("Placed key[%s] val[%s]", key, message);
                        }
                    } catch (Exception e) {
                        LOG.error(e, "Error reading stream for topic [%s]", topic);
                    } finally {
                        consumerConnector.shutdown();
                    }
                }
            }
        });
        Futures.addCallback(future, new FutureCallback<Object>() {

            @Override
            public void onSuccess(Object result) {
                LOG.debug("Success listening to [%s]", topic);
            }

            @Override
            public void onFailure(Throwable t) {
                if (t instanceof CancellationException) {
                    LOG.debug("Topic [%s] cancelled", topic);
                } else {
                    LOG.error(t, "Error in listening to [%s]", topic);
                }
            }
        }, MoreExecutors.sameThreadExecutor());
        this.future = future;
        final Stopwatch stopwatch = Stopwatch.createStarted();
        try {
            while (!startingReads.await(100, TimeUnit.MILLISECONDS) && connectTimeout > 0L) {
                // Don't return until we have actually connected
                if (future.isDone()) {
                    future.get();
                } else {
                    if (stopwatch.elapsed(TimeUnit.MILLISECONDS) > connectTimeout) {
                        throw new TimeoutException("Failed to connect to kafka in sufficient time");
                    }
                }
            }
        } catch (InterruptedException | ExecutionException | TimeoutException e) {
            executorService.shutdown();
            if (!future.isDone() && !future.cancel(false)) {
                LOG.warn("Could not cancel kafka listening thread");
            }
            LOG.error(e, "Failed to start kafka extraction factory");
            cacheHandler.close();
            return false;
        }
        started.set(true);
        return true;
    }
}
Also used : MessageAndMetadata(kafka.message.MessageAndMetadata) Stopwatch(com.google.common.base.Stopwatch) KafkaStream(kafka.consumer.KafkaStream) Properties(java.util.Properties) IAE(io.druid.java.util.common.IAE) CountDownLatch(java.util.concurrent.CountDownLatch) TimeoutException(java.util.concurrent.TimeoutException) CancellationException(java.util.concurrent.CancellationException) ExecutionException(java.util.concurrent.ExecutionException) CancellationException(java.util.concurrent.CancellationException) Whitelist(kafka.consumer.Whitelist) List(java.util.List) ISE(io.druid.java.util.common.ISE) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with ISE

use of io.druid.java.util.common.ISE in project druid by druid-io.

the class HdfsStorageAuthentication method authenticate.

/**
   * Dose authenticate against a secured hadoop cluster
   * In case of any bug fix make sure to fix the code in JobHelper#authenticate as well.
   */
@LifecycleStart
public void authenticate() {
    String principal = hdfsKerberosConfig.getPrincipal();
    String keytab = hdfsKerberosConfig.getKeytab();
    if (!Strings.isNullOrEmpty(principal) && !Strings.isNullOrEmpty(keytab)) {
        UserGroupInformation.setConfiguration(hadoopConf);
        if (UserGroupInformation.isSecurityEnabled()) {
            try {
                if (UserGroupInformation.getCurrentUser().hasKerberosCredentials() == false || !UserGroupInformation.getCurrentUser().getUserName().equals(principal)) {
                    log.info("Trying to authenticate user [%s] with keytab [%s]..", principal, keytab);
                    UserGroupInformation.loginUserFromKeytab(principal, keytab);
                }
            } catch (IOException e) {
                throw new ISE(e, "Failed to authenticate user principal [%s] with keytab [%s]", principal, keytab);
            }
        }
    }
}
Also used : ISE(io.druid.java.util.common.ISE) IOException(java.io.IOException) LifecycleStart(io.druid.java.util.common.lifecycle.LifecycleStart)

Example 3 with ISE

use of io.druid.java.util.common.ISE in project druid by druid-io.

the class JobHelper method runJobs.

public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config) {
    String failedMessage = null;
    for (Jobby job : jobs) {
        if (failedMessage == null) {
            if (!job.run()) {
                failedMessage = String.format("Job[%s] failed!", job.getClass());
            }
        }
    }
    if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) {
        if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) {
            Path workingPath = config.makeIntermediatePath();
            log.info("Deleting path[%s]", workingPath);
            try {
                workingPath.getFileSystem(injectSystemProperties(new Configuration())).delete(workingPath, true);
            } catch (IOException e) {
                log.error(e, "Failed to cleanup path[%s]", workingPath);
            }
        }
    }
    if (failedMessage != null) {
        throw new ISE(failedMessage);
    }
    return true;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ISE(io.druid.java.util.common.ISE) IOException(java.io.IOException)

Example 4 with ISE

use of io.druid.java.util.common.ISE in project druid by druid-io.

the class HyperLogLogCollector method fold.

public HyperLogLogCollector fold(HyperLogLogCollector other) {
    if (other == null || other.storageBuffer.remaining() == 0) {
        return this;
    }
    if (storageBuffer.isReadOnly()) {
        convertToMutableByteBuffer();
    }
    if (storageBuffer.remaining() != getNumBytesForDenseStorage()) {
        convertToDenseStorage();
    }
    estimatedCardinality = null;
    if (getRegisterOffset() < other.getRegisterOffset()) {
        // "Swap" the buffers so that we are folding into the one with the higher offset
        final ByteBuffer tmpBuffer = ByteBuffer.allocate(storageBuffer.remaining());
        tmpBuffer.put(storageBuffer.asReadOnlyBuffer());
        tmpBuffer.clear();
        storageBuffer.duplicate().put(other.storageBuffer.asReadOnlyBuffer());
        other = HyperLogLogCollector.makeCollector(tmpBuffer);
    }
    final ByteBuffer otherBuffer = other.storageBuffer;
    // Save position and restore later to avoid allocations due to duplicating the otherBuffer object.
    final int otherPosition = otherBuffer.position();
    try {
        final byte otherOffset = other.getRegisterOffset();
        byte myOffset = getRegisterOffset();
        short numNonZero = getNumNonZeroRegisters();
        final int offsetDiff = myOffset - otherOffset;
        if (offsetDiff < 0) {
            throw new ISE("offsetDiff[%d] < 0, shouldn't happen because of swap.", offsetDiff);
        }
        final int myPayloadStart = getPayloadBytePosition();
        otherBuffer.position(other.getPayloadBytePosition());
        if (isSparse(otherBuffer)) {
            while (otherBuffer.hasRemaining()) {
                final int payloadStartPosition = otherBuffer.getShort() - other.getNumHeaderBytes();
                numNonZero += mergeAndStoreByteRegister(storageBuffer, myPayloadStart + payloadStartPosition, offsetDiff, otherBuffer.get());
            }
            if (numNonZero == NUM_BUCKETS) {
                numNonZero = decrementBuckets();
                setRegisterOffset(++myOffset);
                setNumNonZeroRegisters(numNonZero);
            }
        } else {
            // dense
            int position = getPayloadBytePosition();
            while (otherBuffer.hasRemaining()) {
                numNonZero += mergeAndStoreByteRegister(storageBuffer, position, offsetDiff, otherBuffer.get());
                position++;
            }
            if (numNonZero == NUM_BUCKETS) {
                numNonZero = decrementBuckets();
                setRegisterOffset(++myOffset);
                setNumNonZeroRegisters(numNonZero);
            }
        }
        // no need to call setRegisterOffset(myOffset) here, since it gets updated every time myOffset is incremented
        setNumNonZeroRegisters(numNonZero);
        // this will add the max overflow and also recheck if offset needs to be shifted
        add(other.getMaxOverflowRegister(), other.getMaxOverflowValue());
        return this;
    } finally {
        otherBuffer.position(otherPosition);
    }
}
Also used : ISE(io.druid.java.util.common.ISE) ByteBuffer(java.nio.ByteBuffer)

Example 5 with ISE

use of io.druid.java.util.common.ISE in project druid by druid-io.

the class DeterminePartitionsJob method run.

public boolean run() {
    try {
        if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) {
            throw new ISE("DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec());
        }
        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            final Job groupByJob = Job.getInstance(new Configuration(), String.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));
            JobHelper.injectSystemProperties(groupByJob);
            config.addJobProperties(groupByJob);
            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob);
            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());
            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL());
            if (!groupByJob.waitForCompletion(true)) {
                log.error("Job failed: %s", groupByJob.getJobID());
                return false;
            }
        } else {
            log.info("Skipping group-by job.");
        }
        /*
       * Read grouped data and determine appropriate partitions.
       */
        final Job dimSelectionJob = Job.getInstance(new Configuration(), String.format("%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));
        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");
        JobHelper.injectSystemProperties(dimSelectionJob);
        config.addJobProperties(dimSelectionJob);
        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            config.addInputPaths(dimSelectionJob);
        }
        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class);
        dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob);
        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());
        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL());
        if (!dimSelectionJob.waitForCompletion(true)) {
            log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
            return false;
        }
        /*
       * Load partitions determined by the previous job.
       */
        log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<Long, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap();
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) {
                List<ShardSpec> specs = config.JSON_MAPPER.readValue(Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() {
                });
                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i));
                }
                shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);
        return true;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SingleDimensionPartitionsSpec(io.druid.indexer.partitions.SingleDimensionPartitionsSpec) SingleDimensionShardSpec(io.druid.timeline.partition.SingleDimensionShardSpec) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) InvalidJobConfException(org.apache.hadoop.mapred.InvalidJobConfException) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) ISE(io.druid.java.util.common.ISE) List(java.util.List) Job(org.apache.hadoop.mapreduce.Job) Interval(org.joda.time.Interval)

Aggregations

ISE (io.druid.java.util.common.ISE)158 IOException (java.io.IOException)37 Map (java.util.Map)23 Test (org.junit.Test)21 File (java.io.File)20 List (java.util.List)19 DateTime (org.joda.time.DateTime)18 ArrayList (java.util.ArrayList)17 DataSegment (io.druid.timeline.DataSegment)15 Interval (org.joda.time.Interval)15 Function (com.google.common.base.Function)14 TimeoutException (java.util.concurrent.TimeoutException)12 IAE (io.druid.java.util.common.IAE)10 HashMap (java.util.HashMap)10 ExecutionException (java.util.concurrent.ExecutionException)10 Stopwatch (com.google.common.base.Stopwatch)9 DimensionSpec (io.druid.query.dimension.DimensionSpec)9 ImmutableMap (com.google.common.collect.ImmutableMap)8 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)8 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)8