use of twitter4j.RawStreamListener in project twitter4j by yusuke.
the class PrintRawSampleStream method main.
/**
* Main entry of this application.
*
* @param args arguments doesn't take effect with this example
* @throws TwitterException when Twitter service or network is unavailable
*/
public static void main(String[] args) throws TwitterException {
TwitterStream twitterStream = new TwitterStreamFactory().getInstance();
RawStreamListener listener = new RawStreamListener() {
@Override
public void onMessage(String rawJSON) {
System.out.println(rawJSON);
}
@Override
public void onException(Exception ex) {
ex.printStackTrace();
}
};
twitterStream.addListener(listener);
twitterStream.sample();
}
use of twitter4j.RawStreamListener in project Anserini by castorini.
the class TweetStreamIndexer method run.
@Override
public void run() {
tweetCount = 0;
final FieldType textOptions = new FieldType();
// textOptions.setIndexed(true);
textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
textOptions.setStored(true);
textOptions.setTokenized(true);
TwitterStream twitterStream = new TwitterStreamFactory().getInstance();
RawStreamListener rawListener = new RawStreamListener() {
@Override
public void onMessage(String rawString) {
Status status = Status.fromJson(rawString);
if (status == null) {
try {
JsonObject obj = (JsonObject) JSON_PARSER.parse(rawString);
if (obj.has("delete")) {
long id = obj.getAsJsonObject("delete").getAsJsonObject("status").get("id").getAsLong();
Query q = LongPoint.newRangeQuery(StatusField.ID.name, id, id);
TweetSearcher.indexWriter.deleteDocuments(q);
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return;
}
if (status.getText() == null) {
return;
}
Document doc = new Document();
doc.add(new LongPoint(StatusField.ID.name, status.getId()));
doc.add(new StoredField(StatusField.ID.name, status.getId()));
doc.add(new LongPoint(StatusField.EPOCH.name, status.getEpoch()));
doc.add(new StoredField(StatusField.EPOCH.name, status.getEpoch()));
doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));
doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions));
doc.add(new IntPoint(StatusField.FRIENDS_COUNT.name, status.getFollowersCount()));
doc.add(new StoredField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount()));
doc.add(new IntPoint(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount()));
doc.add(new StoredField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount()));
doc.add(new IntPoint(StatusField.STATUSES_COUNT.name, status.getStatusesCount()));
doc.add(new StoredField(StatusField.STATUSES_COUNT.name, status.getStatusesCount()));
long inReplyToStatusId = status.getInReplyToStatusId();
if (inReplyToStatusId > 0) {
doc.add(new LongPoint(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId));
doc.add(new StoredField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId));
doc.add(new LongPoint(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId()));
doc.add(new StoredField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId()));
}
String lang = status.getLang();
if (!lang.equals("unknown")) {
doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES));
}
long retweetStatusId = status.getRetweetedStatusId();
if (retweetStatusId > 0) {
doc.add(new LongPoint(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId));
doc.add(new StoredField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId));
doc.add(new LongPoint(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId()));
doc.add(new StoredField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId()));
doc.add(new IntPoint(StatusField.RETWEET_COUNT.name, status.getRetweetCount()));
doc.add(new StoredField(StatusField.RETWEET_COUNT.name, status.getRetweetCount()));
if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) {
System.err.println("Error parsing retweet fields of " + status.getId());
}
}
try {
TweetSearcher.indexWriter.addDocument(doc);
tweetCount++;
if (tweetCount % 1000 == 0) {
LOG.info(tweetCount + " statuses indexed");
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public void onException(Exception e) {
// TODO Auto-generated method stub
e.printStackTrace();
}
};
twitterStream.addListener(rawListener);
twitterStream.sample();
}
use of twitter4j.RawStreamListener in project Anserini by castorini.
the class TRECIndexerRunnable method run.
@Override
public void run() {
final FieldType textOptions = new FieldType();
textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
textOptions.setStored(true);
textOptions.setTokenized(true);
twitterStream = new TwitterStreamFactory().getInstance();
RawStreamListener rawListener = new RawStreamListener() {
@Override
public void onMessage(String rawString) {
Status status = Status.fromJson(rawString);
// TREC 2016 rule: Treatment of retweets.
if (status.getRetweetStatusString() != null) {
status = Status.fromJson(status.getRetweetStatusString());
}
if (status == null) {
try {
JsonObject obj = (JsonObject) JSON_PARSER.parse(rawString);
// Tweet deletion update: delete from the existed index
if (obj.has("delete")) {
long id = obj.getAsJsonObject("delete").getAsJsonObject("status").get("id").getAsLong();
Query q = LongPoint.newRangeQuery(StatusField.ID.name, id, id);
indexWriter.deleteDocuments(q);
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return;
}
if (status.getText() == null) {
return;
}
// filter out retweets
if (status.getText().substring(0, 4).equals("RT @")) {
return;
}
// concatenate with whitespace
if (!status.getLang().equals("en")) {
return;
}
String rawText = status.getText();
String processedRawText = rawText.replaceAll("[^\\x00-\\x7F]", "");
if (processedRawText == null) {
return;
}
String whiteSpaceTokenizedText = TRECTwokenizer.trecTokenizeText(processedRawText);
if (whiteSpaceTokenizedText == "") {
return;
}
Document doc = new Document();
doc.add(new LongPoint(StatusField.ID.name, status.getId()));
doc.add(new StoredField(StatusField.ID.name, status.getId()));
doc.add(new LongPoint(StatusField.EPOCH.name, status.getEpoch()));
doc.add(new StoredField(StatusField.EPOCH.name, status.getEpoch()));
doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));
doc.add(new TextField(StatusField.NAME.name, status.getName(), Store.YES));
doc.add(new TextField(StatusField.PROFILE_IMAGE_URL.name, status.getProfileImageURL(), Store.YES));
doc.add(new Field(StatusField.TEXT.name, whiteSpaceTokenizedText, textOptions));
doc.add(new TextField(StatusField.RAW_TEXT.name, status.getText(), Store.YES));
long retweetStatusId = status.getRetweetedStatusId();
if (retweetStatusId > 0) {
doc.add(new IntPoint(StatusField.RETWEET_COUNT.name, status.getRetweetCount()));
doc.add(new StoredField(StatusField.RETWEET_COUNT.name, status.getRetweetCount()));
if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) {
System.err.println("Error parsing retweet fields of " + status.getId());
}
}
try {
indexWriter.addDocument(doc);
indexWriter.commit();
tweetCount++;
if (tweetCount % 1000 == 0) {
LOG.info(tweetCount + " statuses indexed");
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public void onException(Exception e) {
// TODO Auto-generated method stub
e.printStackTrace();
}
};
twitterStream.addListener(rawListener);
twitterStream.sample();
}
Aggregations