use of org.wikivoyage.listings.entity.Listing in project wikivoyage-listings by baturin.
the class ValidationReport method write.
@Override
public void write(Iterable<Listing> pois, String outputFilename, String dumpDate) throws WriteOutputException {
try {
StringBuilder rows = new StringBuilder();
for (Listing poi : pois) {
for (ValidationIssue issue : poi.getValidationIssues()) {
rows.append(createRow(poi, issue.getDescription(poi), issue.getCategory()));
}
}
String template = IOUtils.toString(this.getClass().getResourceAsStream("/validation-report-template.htm"), "UTF-8");
BufferedWriter writer = null;
try {
FileWriter fwriter = new FileWriter(outputFilename);
writer = new BufferedWriter(fwriter);
// Replace variables in template.
template = template.replace("{rows}", rows.toString());
template = template.replaceAll("\\{dumpDate\\}", dumpDate);
writer.write(template);
} finally {
if (writer != null) {
writer.close();
}
}
} catch (IOException e) {
throw new WriteOutputException();
}
}
use of org.wikivoyage.listings.entity.Listing in project wikivoyage-listings by baturin.
the class Main method processDump.
private static void processDump(DumpDownloader downloader, String language, String latestDumpDate, String dumpDate, HashMap<String, OutputFormat> formats, boolean useIntermediateFile) throws IOException, FileUtilsException, InterruptedException, WriteOutputException {
boolean allFileExists = true;
for (OutputFormat format : formats.values()) {
String fileName = fileNames.getListingPath(language, dumpDate, format.getDefaultExtension(), true);
if (!FileUtils.fileExists(fileName)) {
allFileExists = false;
break;
}
}
if (allFileExists) {
log.info("All files already exist for '" + language + "-" + dumpDate + "'");
return;
}
log.info("Create POIs for '" + dumpDate + "'");
String dumpUrl = downloader.dumpUrl(language, dumpDate);
String dumpPath = fileNames.dumpCacheFilename(language, dumpDate);
if (!FileUtils.fileExists(dumpPath)) {
downloader.downloadDumpFromUrl(dumpUrl, dumpPath);
}
// Prepare to iterate over listings.
Iterable<Listing> listings = new ListingsIterable(dumpPath);
// Write listings to intermediate file.
if (useIntermediateFile) {
log.info("Write intermediate file with parsed listings");
String javaSerialFile = fileNames.workingDirPath("serialized-pois.bin");
FileUtils.removeFile(javaSerialFile);
new JavaSerializedObject().write(listings, javaSerialFile, dumpDate);
listings = new JavaSerializedIterable(javaSerialFile);
}
listings = validate(listings);
// Write listings to all the output formats.
for (OutputFormat format : formats.values()) {
writeFormat(listings, language, dumpDate, latestDumpDate, format);
}
}
use of org.wikivoyage.listings.entity.Listing in project wikivoyage-listings by baturin.
the class ArticleParser method parsePage.
/**
* Parse single Wikivoyage page, look for listings, put them into list of POIs
* @param article Name of Wikivoyage article
* @param text Wikivoyage page as string
*/
public List<Listing> parsePage(String article, String text) {
log.debug("Start: parse article '" + article + "'");
LinkedList<Listing> pois = new LinkedList<>();
try {
ParserConfig config = new SimpleParserConfig();
WikitextPreprocessor p = new WikitextPreprocessor(config);
WtNode node = p.parseArticle(text, "");
processNode(article, node, pois);
} catch (Exception e) {
System.err.println("Failure");
e.printStackTrace();
}
log.debug("End: parse article '" + article + "'");
return pois;
}
use of org.wikivoyage.listings.entity.Listing in project wikivoyage-listings by baturin.
the class JavaSerializedIterator method next.
@Override
public Listing next() {
Listing currentPOI = poi;
getNext();
return currentPOI;
}
use of org.wikivoyage.listings.entity.Listing in project wikivoyage-listings by baturin.
the class CSV method write.
public void write(Iterable<Listing> pois, String outputFilename, String dumpDate) throws WriteOutputException {
BufferedWriter writer = null;
try {
try {
FileWriter fwriter = new FileWriter(outputFilename);
writer = new BufferedWriter(fwriter);
// Write the CSV header.
writer.write("article" + SEPARATOR + "type" + SEPARATOR + "title" + SEPARATOR + "alt" + SEPARATOR + "wikidata" + SEPARATOR + "wikipedia" + SEPARATOR + "address" + SEPARATOR + "directions" + SEPARATOR + "phone" + SEPARATOR + "tollFree" + SEPARATOR + "email" + SEPARATOR + "fax" + SEPARATOR + "url" + SEPARATOR + "hours" + SEPARATOR + "checkIn" + SEPARATOR + "checkOut" + SEPARATOR + "image" + SEPARATOR + "price" + SEPARATOR + "latitude" + SEPARATOR + "longitude" + SEPARATOR + "wifi" + SEPARATOR + "accessibility" + SEPARATOR + "lastEdit" + SEPARATOR + "description" + NEW_LINE);
// Write each POI.
for (Listing poi : pois) {
writer.write(foolproof(poi.getArticle()) + SEPARATOR);
writer.write(foolproof(poi.getType()) + SEPARATOR);
writer.write(foolproof(poi.getTitle()) + SEPARATOR);
writer.write(foolproof(poi.getAlt()) + SEPARATOR);
writer.write(foolproof(poi.getWikidata()) + SEPARATOR);
writer.write(foolproof(poi.getWikipedia()) + SEPARATOR);
writer.write(foolproof(poi.getAddress()) + SEPARATOR);
writer.write(foolproof(poi.getDirections()) + SEPARATOR);
writer.write(foolproof(poi.getPhone()) + SEPARATOR);
writer.write(foolproof(poi.getTollFree()) + SEPARATOR);
writer.write(foolproof(poi.getEmail()) + SEPARATOR);
writer.write(foolproof(poi.getFax()) + SEPARATOR);
writer.write(foolproof(poi.getUrl()) + SEPARATOR);
writer.write(foolproof(poi.getHours()) + SEPARATOR);
writer.write(foolproof(poi.getCheckIn()) + SEPARATOR);
writer.write(foolproof(poi.getCheckOut()) + SEPARATOR);
writer.write(foolproof(poi.getImage()) + SEPARATOR);
writer.write(foolproof(poi.getPrice()) + SEPARATOR);
writer.write(foolproof(poi.getLatitude()) + SEPARATOR);
writer.write(foolproof(poi.getLongitude()) + SEPARATOR);
writer.write(foolproof(poi.getWifi()) + SEPARATOR);
writer.write(foolproof(poi.getAccessibility()) + SEPARATOR);
writer.write(foolproof(poi.getLastEdit()) + SEPARATOR);
writer.write(foolproof(poi.getDescription()) + NEW_LINE);
}
} finally {
if (writer != null) {
writer.close();
}
}
} catch (IOException e) {
throw new WriteOutputException();
}
}
Aggregations