use of org.apache.commons.io.LineIterator in project sling by apache.
the class SimpleDistributionQueueProvider method enableQueueProcessing.
public void enableQueueProcessing(@Nonnull DistributionQueueProcessor queueProcessor, String... queueNames) {
if (checkpoint) {
// recover from checkpoints
log.debug("recovering from checkpoints if needed");
for (final String queueName : queueNames) {
log.debug("recovering for queue {}", queueName);
DistributionQueue queue = getQueue(queueName);
FilenameFilter filenameFilter = new FilenameFilter() {
@Override
public boolean accept(File file, String name) {
return name.equals(queueName + "-checkpoint");
}
};
for (File qf : checkpointDirectory.listFiles(filenameFilter)) {
log.info("recovering from checkpoint {}", qf);
try {
LineIterator lineIterator = IOUtils.lineIterator(new FileReader(qf));
while (lineIterator.hasNext()) {
String s = lineIterator.nextLine();
String[] split = s.split(" ");
String id = split[0];
String infoString = split[1];
Map<String, Object> info = new HashMap<String, Object>();
JsonReader reader = Json.createReader(new StringReader(infoString));
JsonObject jsonObject = reader.readObject();
for (Map.Entry<String, JsonValue> entry : jsonObject.entrySet()) {
if (entry.getValue().getValueType().equals(JsonValue.ValueType.ARRAY)) {
JsonArray value = jsonObject.getJsonArray(entry.getKey());
String[] a = new String[value.size()];
for (int i = 0; i < a.length; i++) {
a[i] = value.getString(i);
}
info.put(entry.getKey(), a);
} else if (JsonValue.NULL.equals(entry.getValue())) {
info.put(entry.getKey(), null);
} else {
info.put(entry.getKey(), ((JsonString) entry.getValue()).getString());
}
}
queue.add(new DistributionQueueItem(id, info));
}
log.info("recovered {} items from queue {}", queue.getStatus().getItemsCount(), queueName);
} catch (FileNotFoundException e) {
log.warn("could not read checkpoint file {}", qf.getAbsolutePath());
} catch (JsonException e) {
log.warn("could not parse info from checkpoint file {}", qf.getAbsolutePath());
}
}
}
// enable checkpointing
for (String queueName : queueNames) {
ScheduleOptions options = scheduler.NOW(-1, 15).canRunConcurrently(false).name(getJobName(queueName + "-checkpoint"));
scheduler.schedule(new SimpleDistributionQueueCheckpoint(getQueue(queueName), checkpointDirectory), options);
}
}
// enable processing
for (String queueName : queueNames) {
ScheduleOptions options = scheduler.NOW(-1, 1).canRunConcurrently(false).name(getJobName(queueName));
scheduler.schedule(new SimpleDistributionQueueProcessor(getQueue(queueName), queueProcessor), options);
}
}
use of org.apache.commons.io.LineIterator in project stanbol by apache.
the class TikaEngineTest method assertContentRegexp.
/**
* Tests if the parsed regex pattern are contained in any line of the parsed
* test
* @throws IOException
*/
public void assertContentRegexp(Blob blob, String... regexp) throws IOException {
Charset charset;
if (blob.getParameter().containsKey("charset")) {
charset = Charset.forName(blob.getParameter().get("charset"));
} else {
charset = Charset.defaultCharset();
}
Reader reader = null;
nextPattern: for (String expr : regexp) {
if (reader != null) {
closeQuietly(reader);
}
final Pattern p = Pattern.compile(".*" + expr + ".*");
reader = new InputStreamReader(blob.getStream(), charset);
final LineIterator it = new LineIterator(reader);
while (it.hasNext()) {
final String line = it.nextLine();
if (p.matcher(line).matches()) {
continue nextPattern;
}
}
fail(this + ": no match for regexp '" + expr + "', content=\n" + IOUtils.toString(blob.getStream(), charset.toString()));
}
}
use of org.apache.commons.io.LineIterator in project stanbol by apache.
the class BenchmarkServlet method getExampleBenchmarkPaths.
private List<String> getExampleBenchmarkPaths(HttpServletRequest request) throws IOException {
// TODO how to enumerate bundle resources?
final String list = getBenchmarkText("/LIST.txt");
final LineIterator it = new LineIterator(new StringReader(list));
final List<String> result = new LinkedList<String>();
while (it.hasNext()) {
result.add(getExampleBenchmarkPath(request, it.nextLine()));
}
return result;
}
use of org.apache.commons.io.LineIterator in project stanbol by apache.
the class GeonamesIndexingSource method entityDataIterator.
@Override
public EntityDataIterator entityDataIterator() {
if (!consumed) {
consumed = true;
} else {
throw new IllegalStateException("This implementation supports only a" + "single Iteration of the data.");
}
return new EntityDataIterator() {
Iterator<RDFTerm> resources = resourceList.iterator();
RDFTerm r;
LineIterator it = null;
private String next;
private Representation rep;
private String getNext() {
while ((it == null || !it.hasNext()) && resources != null && resources.hasNext()) {
if (r != null) {
IOUtils.closeQuietly(r.is);
}
r = resources.next();
try {
it = r.getEntries();
} catch (IOException e) {
log.error("Unable to read RDFTerm '" + r.getName() + "' because of " + e.getMessage(), e);
e.printStackTrace();
IOUtils.closeQuietly(r.is);
it = null;
}
resources.remove();
}
if (it != null && it.hasNext()) {
return it.nextLine();
} else {
return null;
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public String next() {
if (next == null) {
next = getNext();
}
if (next == null) {
throw new NoSuchElementException();
} else {
rep = processGeonameEntry(next);
next = null;
return rep.getId();
}
}
@Override
public boolean hasNext() {
if (next == null) {
next = getNext();
}
return next != null;
}
@Override
public Representation getRepresentation() {
return rep;
}
@Override
public void close() {
if (r != null) {
IOUtils.closeQuietly(r.is);
}
next = null;
it = null;
resources = null;
}
/**
* Parses the Representation from the current line.<p>
* NOTE: this does not process alternate labels and also does not
* lookup entities for parent codes. Those things are done now by
* own EntityProcessors
* @param line the line to process
* @return the representation
*/
private Representation processGeonameEntry(String line) {
LineTokenizer t = new LineTokenizer(line);
//[0] geonames id
String id = t.next();
Integer geoNamesId = Integer.parseInt(id);
//create a new Doc based on the first Element (geonamesID)
Representation doc = valueFactory.createRepresentation(new StringBuilder(GEONAMES_RESOURCE_NS).append(id).append('/').toString());
//add the Integer id so that we do not need to parse it from the subject URI
doc.add(GeonamesPropertyEnum.idx_id.toString(), geoNamesId);
//add the geonames:Feature type
doc.add(GeonamesPropertyEnum.rdf_type.toString(), getReference(GeonamesPropertyEnum.gn_Feature.toString()));
//[1] UTF-8 name
String utf8Label = t.next();
//[2] ASKII Name as rdfs:label
String askiiLabel = t.next();
if (utf8Label == null) {
//use ASKII label as fallback for the utf8 version
utf8Label = askiiLabel;
}
doc.addNaturalText(GeonamesPropertyEnum.gn_name.toString(), utf8Label);
//[3] Alternate Names
//alternate names are added later during processing
t.next();
//addAlternateNames(geoNamesId, doc);
//[4] lat
doc.add(GeonamesPropertyEnum.geo_lat.toString(), new BigDecimal(t.next()));
//[5] lon
doc.add(GeonamesPropertyEnum.geo_long.toString(), new BigDecimal(t.next()));
//[6] featureClass
String featureClass = new StringBuilder(GEONAMES_ONTOLOGY_NS).append(t.next()).toString();
doc.add(GeonamesPropertyEnum.gn_featureClass.toString(), getReference(featureClass));
//[7] featureCode (-> need to use <featureClass>.<featureCode>!!)
doc.add(GeonamesPropertyEnum.gn_featureCode.toString(), getReference(new StringBuilder(featureClass).append('.').append(t.next()).toString()));
//countryCode
// -> geonames uses here the link to an HTML Page showing the Country
// We would like to use an Link to a SKOS:Concept representing the Country
// ... But luckily here we need only to add the URI!
Set<String> ccs = new HashSet<String>();
//[8] countryCode
String countryCode = t.next();
if (countryCode != null) {
//need to trim because some country codes use ' ' to indicate null!
countryCode = countryCode.trim();
if (countryCode.length() == 2) {
//Yes there are some features that are in no country!
ccs.add(countryCode);
}
}
//[9] alternate countryCodes
String altCc = t.next();
if (altCc != null) {
StringTokenizer altCcT = new StringTokenizer(altCc, ",");
while (altCcT.hasMoreElements()) {
countryCode = altCcT.nextToken();
if (countryCode.length() == 2) {
ccs.add(countryCode);
}
}
}
if (!ccs.isEmpty()) {
doc.add(GeonamesPropertyEnum.gn_countryCode.toString(), ccs);
}
//[10 TO 13] Admin codes
//first read them -> we need to consume the tokens anyway
String[] adminCodes = new String[] { //country
countryCode, //ADM1
t.next(), //ADM2
t.next(), //ADM3
t.next(), //ADM4
t.next() };
//Workaround for Admin1 -> add leading '0' for single Value
if (adminCodes[1] != null && adminCodes[1].length() < 2) {
adminCodes[1] = '0' + adminCodes[1];
}
//now process the admin Codes (including the country at index 0)
StringBuilder parentCode = new StringBuilder();
//iterate over parent codes until the first NULL (or '00' unknown) element
for (int i = 0; i < adminCodes.length && adminCodes[i] != null && !adminCodes[i].equals("00"); i++) {
if (i > 0) {
parentCode.append('.');
}
//add the current (last) Element
parentCode.append(adminCodes[i]);
String property = i == 0 ? GeonamesPropertyEnum.idx_CC.toString() : new StringBuilder(GeonamesPropertyEnum.idx_ADM.toString()).append(i).toString();
// add each level
doc.add(property, parentCode.toString());
}
//[14] population
String populationString = t.next();
if (populationString != null) {
//NOTE: we need to used Long, because of Asia (3.800.000)
Long population = new Long(populationString);
if (population.intValue() > 0) {
doc.add(GeonamesPropertyEnum.gn_population.toString(), population);
}
}
//[15 TO 16] elevation and gtopo30
String altString = t.next();
if (altString == null) {
//if no elevation than use the gtopo30
altString = t.next();
} else {
//if there is already en elevation, than consume these entry
t.next();
}
Integer alt = Integer.valueOf(altString);
if (alt.intValue() > -9999) {
//it looks like that -9999 is sometimes used as not known!
doc.add(GeonamesPropertyEnum.geo_alt.toString(), alt);
}
//[17] time zone
//not used
t.next();
//[18] mod-date
String modDateString = t.next();
if (modDateString != null) {
try {
doc.add(GeonamesPropertyEnum.dc_date.toString(), TimeUtils.toDate(DataTypeEnum.DateTime, modDateString));
} catch (IllegalArgumentException e) {
log.warn(String.format("Unable to parse modificationDate for geonamesID %s from value %s", doc.getId(), modDateString));
}
}
//doc.add(GeonamesPropertyEnum.dc_creator.toString(),"http://www.geonames.org/");
return doc;
}
};
}
use of org.apache.commons.io.LineIterator in project stanbol by apache.
the class StanbolResourceLoader method getLines.
public List<String> getLines(String resource) throws IOException {
List<String> lines = new ArrayList<String>();
LineIterator it = IOUtils.lineIterator(openResource(resource), "UTF-8");
while (it.hasNext()) {
String line = it.nextLine();
if (line != null && !line.isEmpty() && line.charAt(0) != '#') {
lines.add(line);
}
}
return lines;
}
Aggregations