use of org.wikidata.query.rdf.tool.rdf.EntityMungingRdfHandler in project wikidata-query-rdf by wikimedia.
the class Munge method run.
public void run() throws RDFHandlerException, IOException, RDFParseException, InterruptedException {
try {
AsyncRDFHandler chunkWriter = AsyncRDFHandler.processAsync(new RDFChunkWriter(chunkFileFormat), false, BUFFER_SIZE);
AtomicLong actualChunk = new AtomicLong(0);
EntityMungingRdfHandler.EntityCountListener chunker = (entities) -> {
long currentChunk = entities / chunkSize;
if (currentChunk != actualChunk.get()) {
actualChunk.set(currentChunk);
// endRDF will cause RDFChunkWriter to start writing a new chunk
chunkWriter.endRDF();
}
};
EntityMungingRdfHandler munger = new EntityMungingRdfHandler(uris, this.munger, chunkWriter, chunker);
RDFParser parser = RDFParserSuppliers.defaultRdfParser().get(AsyncRDFHandler.processAsync(new NormalizingRdfHandler(munger), true, BUFFER_SIZE));
parser.parse(from, uris.root());
// thread:main: parser -> AsyncRDFHandler -> queue
// thread:replayer1: Normalizing/Munging -> AsyncRDFHandler -> queue
// thread:replayer2: RDFChunkWriter -> RDFWriter -> IO
chunkWriter.waitForCompletion();
} finally {
try {
from.close();
} catch (IOException e) {
log.error("Error closing input", e);
}
}
}
Aggregations