use of org.opensextant.extractors.geo.PlaceGeocoder in project Xponents by OpenSextant.
the class XponentsGeotagger method process.
/**
* Process the text for the given document.
*
* @param input the input
* @param jobParams the job params
* @return the representation
*/
public Representation process(TextInput input, RequestParameters jobParams) {
if (input == null || input.buffer == null) {
return status("FAIL", "No text");
}
debug("Processing plain text doc");
++requestCount;
try {
if (prodMode) {
PlaceGeocoder xgeo = (PlaceGeocoder) getExtractor();
List<TextMatch> matches = xgeo.extract(input);
/*
* formulate matches as JSON output.
*/
return format(matches, jobParams);
}
} catch (Exception processingErr) {
error("Failure on doc " + input.id, processingErr);
return status("FAIL", processingErr.getMessage() + "; requests=" + requestCount);
}
return status("TEST", "nothing done in test with doc=" + input.id);
}
use of org.opensextant.extractors.geo.PlaceGeocoder in project Xponents by OpenSextant.
the class XlayerRestlet method configure.
/**
*
* @throws ConfigException
*/
public void configure() throws ConfigException {
// Default - process place/country mentions in document texts.
//
tagger = new PlaceGeocoder();
// tagger.setParameters(this.params); See Xponents concept of Parameters
tagger.enablePersonNameMatching(true);
tagger.configure();
// TODO: refine this filter list. Use "/filters/non-placenames,user.csv" going forward.
//
String userFilterPath = "/filters/non-placenames,user.csv";
URL filterFile = getClass().getResource(userFilterPath);
if (filterFile != null) {
//
try {
MatchFilter filt = new MatchFilter(filterFile);
tagger.setMatchFilter(filt);
} catch (IOException err) {
throw new ConfigException("Setup error with geonames utility or other configuration", err);
}
} else {
info("Optional user filter not found. User exclusion list is file=" + userFilterPath);
}
}
use of org.opensextant.extractors.geo.PlaceGeocoder in project Xponents by OpenSextant.
the class GeoTaggerMapper method setup.
/**
* Setup. XTax or PlaceGecoder takes in SOLR path for xponents solr from JVM environment.
*/
@Override
public void setup(Context c) throws IOException {
super.setup(c);
try {
geocoder = new PlaceGeocoder();
geocoder.configure();
} catch (ConfigException e) {
// TODO Auto-generated catch block
throw new IOException("setup.PlaceGeocoder", e);
}
log.info("DONE");
}
use of org.opensextant.extractors.geo.PlaceGeocoder in project Xponents by OpenSextant.
the class BasicGeoTemporalProcessing method setup.
/** Ideally you should separate your one-time initialization steps, configuring your extractors
* apart from the repetitive steps of setting up Jobs and Inputs. Outputs you might setup once
* for the entire JVM session, or it may be something you do periodically. In summary:
*
* configure separately:
* a) extractors, converters
* b) job inputs and parameters
* c) output formatters
* d) other resources, e.g., filters
*/
public void setup(String inFile, List<String> outFormats, String outFile, String tempDir) throws ConfigException, ProcessingException, IOException {
params.isdefault = false;
if (!validateParameters(inFile, outFormats, outFile, tempDir, params)) {
throw new ProcessingException("VALIDATION ERRORS: " + runnerMessage.toString());
}
// If you are dead-sure you want only coordinates from text, then just use XCoord.
// Otherwise SimpleGeocoder does both coords + names.
//
//XCoord xcoord = new XCoord();
//xcoord.configure();
//this.addExtractor(xcoord);
// Testing only
params.tag_places = true;
params.tag_coordinates = true;
params.output_countries = false;
PlaceGeocoder geocoder = new PlaceGeocoder();
geocoder.enablePersonNameMatching(true);
geocoder.setParameters(params);
geocoder.configure();
this.addExtractor(geocoder);
XTemporal xtemp = new XTemporal();
xtemp.configure();
this.addExtractor(xtemp);
converter = new XText();
converter.enableHTMLScrubber(false);
converter.enableSaving(true);
converter.enableOverwrite(false);
converter.setConversionListener(this);
//
if (tempDir != null) {
converter.getPathManager().setConversionCache(tempDir);
} else {
converter.enableSaving(false);
}
try {
converter.setup();
} catch (IOException ioerr) {
throw new ConfigException("Document converter could not start", ioerr);
}
this.params.inputFile = inFile.trim();
this.params.outputFile = outFile.trim();
if (outFormats != null) {
for (String fmt : outFormats) {
params.addOutputFormat(fmt);
AbstractFormatter formatter = createFormatter(fmt, params);
formatter.overwrite = overwriteOutput;
this.addFormatter(formatter);
//if (formatter instanceof CSVFormatter) {
// formatter.addField(OpenSextantSchema.FILEPATH.getName());
// formatter.addField(OpenSextantSchema.MATCH_TEXT.getName());
// }
formatter.start(params.getJobName());
}
}
}
Aggregations