Search in sources :

Example 1 with ConnectionLifeCycleListener

use of twitter4j.ConnectionLifeCycleListener in project druid by druid-io.

the class TwitterSpritzerFirehoseFactory method connect.

@Override
public Firehose connect(InputRowParser parser) throws IOException {
    final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() {

        @Override
        public void onConnect() {
            log.info("Connected_to_Twitter");
        }

        @Override
        public void onDisconnect() {
            log.info("Disconnect_from_Twitter");
        }

        /**
       * called before thread gets cleaned up
       */
        @Override
        public void onCleanUp() {
            log.info("Cleanup_twitter_stream");
        }
    };
    // ConnectionLifeCycleListener
    final TwitterStream twitterStream;
    final StatusListener statusListener;
    final int QUEUE_SIZE = 2000;
    /** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread.   */
    final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE);
    final long startMsec = System.currentTimeMillis();
    //
    //   set up Twitter Spritzer
    //
    twitterStream = new TwitterStreamFactory().getInstance();
    twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener);
    statusListener = new StatusListener() {

        // This is what really gets called to deliver stuff from twitter4j
        @Override
        public void onStatus(Status status) {
            // time to stop?
            if (Thread.currentThread().isInterrupted()) {
                throw new RuntimeException("Interrupted, time to stop");
            }
            try {
                boolean success = queue.offer(status, 15L, TimeUnit.SECONDS);
                if (!success) {
                    log.warn("queue too slow!");
                }
            } catch (InterruptedException e) {
                throw new RuntimeException("InterruptedException", e);
            }
        }

        @Override
        public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
        //log.info("Got a status deletion notice id:" + statusDeletionNotice.getStatusId());
        }

        @Override
        public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
            // This notice will be sent each time a limited stream becomes unlimited.
            // If this number is high and or rapidly increasing, it is an indication that your predicate is too broad, and you should consider a predicate with higher selectivity.
            log.warn("Got track limitation notice:" + numberOfLimitedStatuses);
        }

        @Override
        public void onScrubGeo(long userId, long upToStatusId) {
        //log.info("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId);
        }

        @Override
        public void onException(Exception ex) {
            ex.printStackTrace();
        }

        @Override
        public void onStallWarning(StallWarning warning) {
            System.out.println("Got stall warning:" + warning);
        }
    };
    twitterStream.addListener(statusListener);
    // creates a generic StatusStream
    twitterStream.sample();
    log.info("returned from sample()");
    return new Firehose() {

        private final Runnable doNothingRunnable = new Runnable() {

            public void run() {
            }
        };

        private long rowCount = 0L;

        private boolean waitIfmax = (getMaxEventCount() < 0L);

        private final Map<String, Object> theMap = new TreeMap<>();

        // DIY json parsing // private final ObjectMapper omapper = new ObjectMapper();
        private boolean maxTimeReached() {
            if (getMaxRunMinutes() <= 0) {
                return false;
            } else {
                return (System.currentTimeMillis() - startMsec) / 60000L >= getMaxRunMinutes();
            }
        }

        private boolean maxCountReached() {
            return getMaxEventCount() >= 0 && rowCount >= getMaxEventCount();
        }

        @Override
        public boolean hasMore() {
            if (maxCountReached() || maxTimeReached()) {
                return waitIfmax;
            } else {
                return true;
            }
        }

        @Override
        public InputRow nextRow() {
            // Interrupted to stop?
            if (Thread.currentThread().isInterrupted()) {
                throw new RuntimeException("Interrupted, time to stop");
            }
            // all done?
            if (maxCountReached() || maxTimeReached()) {
                if (waitIfmax) {
                    // sleep a long time instead of terminating
                    try {
                        log.info("reached limit, sleeping a long time...");
                        Thread.sleep(2000000000L);
                    } catch (InterruptedException e) {
                        throw new RuntimeException("InterruptedException", e);
                    }
                } else {
                // allow this event through, and the next hasMore() call will be false
                }
            }
            if (++rowCount % 1000 == 0) {
                log.info("nextRow() has returned %,d InputRows", rowCount);
            }
            Status status;
            try {
                status = queue.take();
            } catch (InterruptedException e) {
                throw new RuntimeException("InterruptedException", e);
            }
            theMap.clear();
            HashtagEntity[] hts = status.getHashtagEntities();
            String text = status.getText();
            theMap.put("text", (null == text) ? "" : text);
            theMap.put("htags", (hts.length > 0) ? Lists.transform(Arrays.asList(hts), new Function<HashtagEntity, String>() {

                @Nullable
                @Override
                public String apply(HashtagEntity input) {
                    return input.getText();
                }
            }) : ImmutableList.<String>of());
            long[] lcontrobutors = status.getContributors();
            List<String> contributors = new ArrayList<>();
            for (long contrib : lcontrobutors) {
                contributors.add(String.format("%d", contrib));
            }
            theMap.put("contributors", contributors);
            GeoLocation geoLocation = status.getGeoLocation();
            if (null != geoLocation) {
                double lat = status.getGeoLocation().getLatitude();
                double lon = status.getGeoLocation().getLongitude();
                theMap.put("lat", lat);
                theMap.put("lon", lon);
            } else {
                theMap.put("lat", null);
                theMap.put("lon", null);
            }
            if (status.getSource() != null) {
                Matcher m = sourcePattern.matcher(status.getSource());
                theMap.put("source", m.find() ? m.group(1) : status.getSource());
            }
            theMap.put("retweet", status.isRetweet());
            if (status.isRetweet()) {
                Status original = status.getRetweetedStatus();
                theMap.put("retweet_count", original.getRetweetCount());
                User originator = original.getUser();
                theMap.put("originator_screen_name", originator != null ? originator.getScreenName() : "");
                theMap.put("originator_follower_count", originator != null ? originator.getFollowersCount() : "");
                theMap.put("originator_friends_count", originator != null ? originator.getFriendsCount() : "");
                theMap.put("originator_verified", originator != null ? originator.isVerified() : "");
            }
            User user = status.getUser();
            final boolean hasUser = (null != user);
            theMap.put("follower_count", hasUser ? user.getFollowersCount() : 0);
            theMap.put("friends_count", hasUser ? user.getFriendsCount() : 0);
            theMap.put("lang", hasUser ? user.getLang() : "");
            // resolution in seconds, -1 if not available?
            theMap.put("utc_offset", hasUser ? user.getUtcOffset() : -1);
            theMap.put("statuses_count", hasUser ? user.getStatusesCount() : 0);
            theMap.put("user_id", hasUser ? String.format("%d", user.getId()) : "");
            theMap.put("screen_name", hasUser ? user.getScreenName() : "");
            theMap.put("location", hasUser ? user.getLocation() : "");
            theMap.put("verified", hasUser ? user.isVerified() : "");
            theMap.put("ts", status.getCreatedAt().getTime());
            List<String> dimensions = Lists.newArrayList(theMap.keySet());
            return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap);
        }

        @Override
        public Runnable commit() {
            // reuse the same object each time
            return doNothingRunnable;
        }

        @Override
        public void close() throws IOException {
            log.info("CLOSE twitterstream");
            // invokes twitterStream.cleanUp()
            twitterStream.shutdown();
        }
    };
}
Also used : User(twitter4j.User) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) TwitterStreamFactory(twitter4j.TwitterStreamFactory) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) StallWarning(twitter4j.StallWarning) ConnectionLifeCycleListener(twitter4j.ConnectionLifeCycleListener) HashtagEntity(twitter4j.HashtagEntity) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Status(twitter4j.Status) Firehose(io.druid.data.input.Firehose) IOException(java.io.IOException) TwitterStream(twitter4j.TwitterStream) StatusListener(twitter4j.StatusListener) StatusDeletionNotice(twitter4j.StatusDeletionNotice) GeoLocation(twitter4j.GeoLocation) Map(java.util.Map) TreeMap(java.util.TreeMap) Nullable(javax.annotation.Nullable)

Aggregations

Firehose (io.druid.data.input.Firehose)1 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)1 Matcher (java.util.regex.Matcher)1 Nullable (javax.annotation.Nullable)1 ConnectionLifeCycleListener (twitter4j.ConnectionLifeCycleListener)1 GeoLocation (twitter4j.GeoLocation)1 HashtagEntity (twitter4j.HashtagEntity)1 StallWarning (twitter4j.StallWarning)1 Status (twitter4j.Status)1 StatusDeletionNotice (twitter4j.StatusDeletionNotice)1 StatusListener (twitter4j.StatusListener)1 TwitterStream (twitter4j.TwitterStream)1 TwitterStreamFactory (twitter4j.TwitterStreamFactory)1 User (twitter4j.User)1