Search in sources :

Example 1 with RawStreamListener

use of twitter4j.RawStreamListener in project twitter4j by yusuke.

the class PrintRawSampleStream method main.

/**
     * Main entry of this application.
     *
     * @param args arguments doesn't take effect with this example
     * @throws TwitterException when Twitter service or network is unavailable
     */
public static void main(String[] args) throws TwitterException {
    TwitterStream twitterStream = new TwitterStreamFactory().getInstance();
    RawStreamListener listener = new RawStreamListener() {

        @Override
        public void onMessage(String rawJSON) {
            System.out.println(rawJSON);
        }

        @Override
        public void onException(Exception ex) {
            ex.printStackTrace();
        }
    };
    twitterStream.addListener(listener);
    twitterStream.sample();
}
Also used : TwitterStreamFactory(twitter4j.TwitterStreamFactory) TwitterStream(twitter4j.TwitterStream) RawStreamListener(twitter4j.RawStreamListener) TwitterException(twitter4j.TwitterException)

Example 2 with RawStreamListener

use of twitter4j.RawStreamListener in project Anserini by castorini.

the class TweetStreamIndexer method run.

@Override
public void run() {
    tweetCount = 0;
    final FieldType textOptions = new FieldType();
    // textOptions.setIndexed(true);
    textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    textOptions.setStored(true);
    textOptions.setTokenized(true);
    TwitterStream twitterStream = new TwitterStreamFactory().getInstance();
    RawStreamListener rawListener = new RawStreamListener() {

        @Override
        public void onMessage(String rawString) {
            Status status = Status.fromJson(rawString);
            if (status == null) {
                try {
                    JsonObject obj = (JsonObject) JSON_PARSER.parse(rawString);
                    if (obj.has("delete")) {
                        long id = obj.getAsJsonObject("delete").getAsJsonObject("status").get("id").getAsLong();
                        Query q = LongPoint.newRangeQuery(StatusField.ID.name, id, id);
                        TweetSearcher.indexWriter.deleteDocuments(q);
                    }
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                return;
            }
            if (status.getText() == null) {
                return;
            }
            Document doc = new Document();
            doc.add(new LongPoint(StatusField.ID.name, status.getId()));
            doc.add(new StoredField(StatusField.ID.name, status.getId()));
            doc.add(new LongPoint(StatusField.EPOCH.name, status.getEpoch()));
            doc.add(new StoredField(StatusField.EPOCH.name, status.getEpoch()));
            doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));
            doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions));
            doc.add(new IntPoint(StatusField.FRIENDS_COUNT.name, status.getFollowersCount()));
            doc.add(new StoredField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount()));
            doc.add(new IntPoint(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount()));
            doc.add(new StoredField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount()));
            doc.add(new IntPoint(StatusField.STATUSES_COUNT.name, status.getStatusesCount()));
            doc.add(new StoredField(StatusField.STATUSES_COUNT.name, status.getStatusesCount()));
            long inReplyToStatusId = status.getInReplyToStatusId();
            if (inReplyToStatusId > 0) {
                doc.add(new LongPoint(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId));
                doc.add(new StoredField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId));
                doc.add(new LongPoint(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId()));
                doc.add(new StoredField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId()));
            }
            String lang = status.getLang();
            if (!lang.equals("unknown")) {
                doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES));
            }
            long retweetStatusId = status.getRetweetedStatusId();
            if (retweetStatusId > 0) {
                doc.add(new LongPoint(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId));
                doc.add(new StoredField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId));
                doc.add(new LongPoint(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId()));
                doc.add(new StoredField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId()));
                doc.add(new IntPoint(StatusField.RETWEET_COUNT.name, status.getRetweetCount()));
                doc.add(new StoredField(StatusField.RETWEET_COUNT.name, status.getRetweetCount()));
                if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) {
                    System.err.println("Error parsing retweet fields of " + status.getId());
                }
            }
            try {
                TweetSearcher.indexWriter.addDocument(doc);
                tweetCount++;
                if (tweetCount % 1000 == 0) {
                    LOG.info(tweetCount + " statuses indexed");
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

        @Override
        public void onException(Exception e) {
            // TODO Auto-generated method stub
            e.printStackTrace();
        }
    };
    twitterStream.addListener(rawListener);
    twitterStream.sample();
}
Also used : Status(io.anserini.document.twitter.Status) RawStreamListener(twitter4j.RawStreamListener) Query(org.apache.lucene.search.Query) JsonObject(com.google.gson.JsonObject) IOException(java.io.IOException) IOException(java.io.IOException) TwitterStreamFactory(twitter4j.TwitterStreamFactory) TwitterStream(twitter4j.TwitterStream)

Example 3 with RawStreamListener

use of twitter4j.RawStreamListener in project Anserini by castorini.

the class TRECIndexerRunnable method run.

@Override
public void run() {
    final FieldType textOptions = new FieldType();
    textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    textOptions.setStored(true);
    textOptions.setTokenized(true);
    twitterStream = new TwitterStreamFactory().getInstance();
    RawStreamListener rawListener = new RawStreamListener() {

        @Override
        public void onMessage(String rawString) {
            Status status = Status.fromJson(rawString);
            // TREC 2016 rule: Treatment of retweets.
            if (status.getRetweetStatusString() != null) {
                status = Status.fromJson(status.getRetweetStatusString());
            }
            if (status == null) {
                try {
                    JsonObject obj = (JsonObject) JSON_PARSER.parse(rawString);
                    // Tweet deletion update: delete from the existed index
                    if (obj.has("delete")) {
                        long id = obj.getAsJsonObject("delete").getAsJsonObject("status").get("id").getAsLong();
                        Query q = LongPoint.newRangeQuery(StatusField.ID.name, id, id);
                        indexWriter.deleteDocuments(q);
                    }
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                return;
            }
            if (status.getText() == null) {
                return;
            }
            // filter out retweets
            if (status.getText().substring(0, 4).equals("RT @")) {
                return;
            }
            // concatenate with whitespace
            if (!status.getLang().equals("en")) {
                return;
            }
            String rawText = status.getText();
            String processedRawText = rawText.replaceAll("[^\\x00-\\x7F]", "");
            if (processedRawText == null) {
                return;
            }
            String whiteSpaceTokenizedText = TRECTwokenizer.trecTokenizeText(processedRawText);
            if (whiteSpaceTokenizedText == "") {
                return;
            }
            Document doc = new Document();
            doc.add(new LongPoint(StatusField.ID.name, status.getId()));
            doc.add(new StoredField(StatusField.ID.name, status.getId()));
            doc.add(new LongPoint(StatusField.EPOCH.name, status.getEpoch()));
            doc.add(new StoredField(StatusField.EPOCH.name, status.getEpoch()));
            doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));
            doc.add(new TextField(StatusField.NAME.name, status.getName(), Store.YES));
            doc.add(new TextField(StatusField.PROFILE_IMAGE_URL.name, status.getProfileImageURL(), Store.YES));
            doc.add(new Field(StatusField.TEXT.name, whiteSpaceTokenizedText, textOptions));
            doc.add(new TextField(StatusField.RAW_TEXT.name, status.getText(), Store.YES));
            long retweetStatusId = status.getRetweetedStatusId();
            if (retweetStatusId > 0) {
                doc.add(new IntPoint(StatusField.RETWEET_COUNT.name, status.getRetweetCount()));
                doc.add(new StoredField(StatusField.RETWEET_COUNT.name, status.getRetweetCount()));
                if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) {
                    System.err.println("Error parsing retweet fields of " + status.getId());
                }
            }
            try {
                indexWriter.addDocument(doc);
                indexWriter.commit();
                tweetCount++;
                if (tweetCount % 1000 == 0) {
                    LOG.info(tweetCount + " statuses indexed");
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

        @Override
        public void onException(Exception e) {
            // TODO Auto-generated method stub
            e.printStackTrace();
        }
    };
    twitterStream.addListener(rawListener);
    twitterStream.sample();
}
Also used : Status(io.anserini.document.twitter.Status) RawStreamListener(twitter4j.RawStreamListener) Query(org.apache.lucene.search.Query) JsonObject(com.google.gson.JsonObject) IOException(java.io.IOException) IOException(java.io.IOException) TwitterStreamFactory(twitter4j.TwitterStreamFactory)

Aggregations

RawStreamListener (twitter4j.RawStreamListener)3 TwitterStreamFactory (twitter4j.TwitterStreamFactory)3 JsonObject (com.google.gson.JsonObject)2 Status (io.anserini.document.twitter.Status)2 IOException (java.io.IOException)2 Query (org.apache.lucene.search.Query)2 TwitterStream (twitter4j.TwitterStream)2 TwitterException (twitter4j.TwitterException)1