use of org.opensextant.data.TextInput in project Xponents by OpenSextant.
the class TweetGeocoder method geocodeTweetUser.
/**
* If user loc.xy:
* write out( xy )
* else if user loc
* geocode (user loc)
* write out ()
*
* geocode(status) write out ()
*/
public void geocodeTweetUser(Tweet tw) {
if (tw.author_xy_val == null || tw.author_location == null) {
return;
}
ExtractionResult res = new ExtractionResult(tw.id);
res.addAttribute("timestamp", tw.pub_date);
res.addAttribute("author", tw.author);
res.addAttribute("tweet", tw.getText());
/*
* If User profile location or geo coord is a Coordinate... parse and add to matched locations
*/
if (tw.author_xy_val != null) {
res.matches = userlocX.extract(new TextInput(tw.id, tw.author_xy_val));
} else if (tw.author_location != null) {
res.matches = userlocX.extract(new TextInput(tw.id, tw.author_location));
}
/*
* If User profile is a place name, attempt to match it and disambiguate.
*/
if (res.matches.isEmpty()) {
try {
res.matches = geocoder.extract(new TextInput(tw.id, tw.author_location));
} catch (Exception userErr) {
log.error("Geocoding error with Users?", userErr);
}
}
if (res.matches.isEmpty()) {
return;
}
userOutput.writeGeocodingResult(res);
}
use of org.opensextant.data.TextInput in project Xponents by OpenSextant.
the class TweetGeocoder method geocodeTweet.
/**
* If a tweet has a non-zero status text, let's find all places in the
* content.
*/
public void geocodeTweet(Tweet tw) {
++recordCount;
if (tw.getText() != null && !tw.getText().isEmpty()) {
try {
ExtractionResult res = new ExtractionResult(tw.id);
// Place name tagger may not work if content has mostly lower case proper names.!!!! TODO: allow mixed case;
res.matches = geocoder.extract(new TextInput(tw.id, tw.getText()));
res.addAttribute("timestamp", tw.pub_date);
res.addAttribute("tweet", tw.getText());
res.addAttribute("author", tw.author);
enrichResults(res.matches);
tweetOutput.writeGeocodingResult(res);
} catch (Exception err) {
log.error("Geocoding error?", err);
}
}
if (recordCount % batch == 0 && recordCount > 0) {
log.info("ROW #" + recordCount);
geocoder.reportMemory();
}
}
use of org.opensextant.data.TextInput in project Xponents by OpenSextant.
the class TaggerResource method processForm.
/**
* Contract:
* docid optional; 'text' | 'doc-list' required.
* command: cmd=ping sends back a simple response
*
* text = UTF-8 encoded text
* docid = user's provided document ID
* doc-list = An array of text
*
* cmd=ping = report status.
*
* Where json-array contains { docs=[ {docid='A', text='...'}, {docid='B', text='...',...] }
* The entire array must be parsable in memory as a single, traversible JSON object.
* We make no assumption about one-JSON object per line or anything about line-endings as separators.
*
*
* @param params
* the params
* @return the representation
* @throws JSONException
* the JSON exception
*/
@Post("application/json;charset=utf-8")
public Representation processForm(JsonRepresentation params) throws JSONException {
org.json.JSONObject json = params.getJsonObject();
String input = json.optString("text", null);
String docid = json.optString("docid", null);
if (input != null) {
String lang = json.optString("lang", null);
TextInput item = new TextInput(docid, input);
item.langid = lang;
RequestParameters job = fromRequest(json);
return process(item, job);
}
// }
return status("FAIL", "Invalid API use text+docid pair or doc-list was not found");
}
use of org.opensextant.data.TextInput in project Xponents by OpenSextant.
the class TaxonMatcher method configure.
@Override
public void configure() throws ConfigException {
try {
initialize();
extract(new TextInput("__initialization___", "trivial priming of the solr pump"));
} catch (Exception err) {
throw new ConfigException("Failed to configure TaxMatcher", err);
}
}
use of org.opensextant.data.TextInput in project Xponents by OpenSextant.
the class TestPlaceGeocoder method tagFile.
public void tagFile(File f, String langid) throws IOException {
// Call as many times as you have documents...
//
TextInput in = new TextInput("test", FileUtility.readFile(f, "UTF-8"));
in.langid = langid;
try {
List<TextMatch> matches = geocoder.extract(in);
summarizeFindings(matches);
} catch (Exception procErr) {
procErr.printStackTrace();
}
}
Aggregations