use of org.wikidata.query.rdf.common.uri.UrisScheme in project wikidata-query-rdf by wikimedia.
the class EntityMungingRdfHandlerUnitTest method testEntityBoundary.
@Test
public void testEntityBoundary() throws RDFHandlerException {
UrisScheme uriScheme = UrisSchemeFactory.WIKIDATA;
Munger munger = Munger.builder(uriScheme).build();
RDFHandler childHandler = mock(RDFHandler.class);
AtomicLong nbEntities = new AtomicLong();
EntityMungingRdfHandler handler = new EntityMungingRdfHandler(uriScheme, munger, childHandler, nbEntities::set);
handler.startRDF();
handler.handleStatement(statement(uriScheme.entityDataHttps() + "Q1", SchemaDotOrg.ABOUT, uriScheme.entityIdToURI("Q1")));
handler.handleStatement(statement(uriScheme.entityDataHttps() + "Q1", SchemaDotOrg.VERSION, 123));
handler.handleStatement(statement(uriScheme.entityDataHttps() + "Q1", SchemaDotOrg.SOFTWARE_VERSION, 2));
handler.handleStatement(statement(uriScheme.entityDataHttps() + "Q1", SchemaDotOrg.DATE_MODIFIED, new LiteralImpl("2019-11-19T15:53:28Z")));
for (Statement statement : siteLink("Q1", "https://en.wikipedia.org/wiki/Thing", "en")) {
handler.handleStatement(statement);
}
// It buffers everything until it reaches another entoty
assertThat(nbEntities.get()).isEqualTo(0);
verify(childHandler).startRDF();
verify(childHandler, never()).handleStatement(any());
// Send another data:entity statement so that we flush the buffer to the munger
handler.handleStatement(statement(uriScheme.entityDataHttps() + "Q2", SchemaDotOrg.ABOUT, uriScheme.entityIdToURI("Q2")));
assertThat(nbEntities.get()).isEqualTo(1);
}
use of org.wikidata.query.rdf.common.uri.UrisScheme in project wikidata-query-rdf by wikimedia.
the class AbstractUpdaterIntegrationTestBase method update.
/**
* Update all ids from from to to.
* @throws Exception
*/
@SuppressWarnings("checkstyle:IllegalCatch")
public void update(int from, int to) {
ExecutorService executorService = new ThreadPoolExecutor(0, 10, 0, TimeUnit.SECONDS, new LinkedBlockingQueue<>());
UrisScheme uris = UrisSchemeFactory.forWikidataHost("www.wikidata.org");
try (Change.Source<?> source = IdRangeChangeSource.forItems(from, to, 30);
Updater<?> updater = new Updater<>(source, wikibaseRepository.get(), rdfRepository, munger, executorService, true, 0, uris, false, new MetricRegistry())) {
updater.run();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.wikidata.query.rdf.common.uri.UrisScheme in project wikidata-query-rdf by wikimedia.
the class Munge method main.
/**
* Run a bulk munge configured from the command line.
*/
@SuppressWarnings("IllegalCatch")
public static void main(String[] args) {
MungeOptions options = handleOptions(MungeOptions.class, args);
UrisScheme uris = OptionsUtils.WikibaseOptions.wikibaseUris(options);
Munger munger = mungerFromOptions(options);
int chunksize = options.chunkSize();
if (chunksize < 1) {
chunksize = Integer.MAX_VALUE;
}
try {
Munge munge = new Munge(uris, munger, CliUtils.reader(options.from()), chunksize, options.to());
munge.run();
} catch (Exception e) {
log.error("Fatal error munging RDF", e);
System.exit(1);
}
}
use of org.wikidata.query.rdf.common.uri.UrisScheme in project wikidata-query-rdf by wikimedia.
the class Update method initialize.
private static Updater<? extends Change.Batch> initialize(String[] args, Closer closer) throws URISyntaxException {
try {
UpdateOptions options = handleOptions(UpdateOptions.class, args);
MetricRegistry metricRegistry = createMetricRegistry(closer, options.metricDomain());
StreamDumper wikibaseStreamDumper = createStreamDumper(dumpDirPath(options));
WikibaseRepository wikibaseRepository = new WikibaseRepository(UpdateOptions.uris(options), options.constraints(), metricRegistry, wikibaseStreamDumper, UpdateOptions.revisionDuration(options), RDFParserSuppliers.defaultRdfParser());
closer.register(wikibaseRepository);
UrisScheme wikibaseUris = WikibaseOptions.wikibaseUris(options);
URI root = wikibaseRepository.getUris().builder().build();
URI sparqlUri = UpdateOptions.sparqlUri(options);
HttpClient httpClient = buildHttpClient(getHttpProxyHost(), getHttpProxyPort());
closer.register(wrapHttpClient(httpClient));
Retryer<ContentResponse> retryer = buildHttpClientRetryer();
Duration rdfClientTimeout = getRdfClientTimeout();
RdfClient rdfClient = new RdfClient(httpClient, sparqlUri, retryer, rdfClientTimeout);
RdfRepository rdfRepository = new RdfRepository(wikibaseUris, rdfClient, MAX_FORM_CONTENT_SIZE);
Instant startTime = getStartTime(startInstant(options), rdfRepository, options.init());
Change.Source<? extends Change.Batch> changeSource = buildChangeSource(options, startTime, wikibaseRepository, rdfClient, root, metricRegistry);
Munger munger = mungerFromOptions(options);
ExecutorService updaterExecutorService = createUpdaterExecutorService(options.threadCount());
Updater<? extends Change.Batch> updater = createUpdater(wikibaseRepository, wikibaseUris, rdfRepository, changeSource, munger, updaterExecutorService, options.importAsync(), options.pollDelay(), options.verify(), metricRegistry);
closer.register(updater);
return updater;
} catch (Exception e) {
log.error("Error during initialization.", e);
throw e;
}
}
use of org.wikidata.query.rdf.common.uri.UrisScheme in project wikidata-query-rdf by wikimedia.
the class WikibaseContextListener method initializeServices.
/**
* Initializes BG service setup to allow whitelisted services.
* Also add additional custom services and functions.
*/
@VisibleForTesting
public void initializeServices() {
MetricRegistry metricRegistry = createMetricRegistry();
// Enable service whitelisting
final ServiceRegistry reg = ServiceRegistry.getInstance();
reg.setWhitelistEnabled(ENABLE_WHITELIST);
LabelService.register();
GeoService.register();
MWApiServiceFactory.register(metricRegistry.timer(name(MWApiServiceCall.class, MW_API_REQUEST)));
CategoriesStoredQuery.register();
// Whitelist services we like by default
reg.addWhitelistURL(GASService.Options.SERVICE_KEY.toString());
reg.addWhitelistURL(ValuesServiceFactory.SERVICE_KEY.toString());
reg.addWhitelistURL(BDS.SEARCH_IN_SEARCH.toString());
reg.addWhitelistURL(SliceServiceFactory.SERVICE_KEY.toString());
reg.addWhitelistURL(SampleServiceFactory.SERVICE_KEY.toString());
loadWhitelist(reg);
// Initialize remote services
reg.setDefaultServiceFactory(getDefaultServiceFactory(metricRegistry.timer(name(RemoteServiceFactoryImpl.class, REMOTE_REQUEST))));
// Override date functions so that we can handle them
// via WikibaseDate
FunctionRegistry.remove(FunctionRegistry.YEAR);
FunctionRegistry.add(FunctionRegistry.YEAR, getWikibaseDateBOpFactory(DateOp.YEAR));
FunctionRegistry.remove(FunctionRegistry.MONTH);
FunctionRegistry.add(FunctionRegistry.MONTH, getWikibaseDateBOpFactory(DateOp.MONTH));
FunctionRegistry.remove(FunctionRegistry.DAY);
FunctionRegistry.add(FunctionRegistry.DAY, getWikibaseDateBOpFactory(DateOp.DAY));
FunctionRegistry.remove(FunctionRegistry.HOURS);
FunctionRegistry.add(FunctionRegistry.HOURS, getWikibaseDateBOpFactory(DateOp.HOURS));
FunctionRegistry.remove(FunctionRegistry.MINUTES);
FunctionRegistry.add(FunctionRegistry.MINUTES, getWikibaseDateBOpFactory(DateOp.MINUTES));
FunctionRegistry.remove(FunctionRegistry.SECONDS);
FunctionRegistry.add(FunctionRegistry.SECONDS, getWikibaseDateBOpFactory(DateOp.SECONDS));
FunctionRegistry.remove(FunctionRegistry.NOW);
FunctionRegistry.add(FunctionRegistry.NOW, (context, globals, scalarValues, args) -> {
if (args != null && args.length > 0)
throw new IllegalArgumentException("no args for NOW()");
return new WikibaseNowBOp(globals);
});
// Geospatial distance function
FunctionRegistry.add(new URIImpl(GeoSparql.FUNCTION_NAMESPACE + "distance"), getDistanceBOPFactory());
// Geospatial functions
FunctionRegistry.add(new URIImpl(GeoSparql.NORTH_EAST_FUNCTION), getCornersBOPFactory(WikibaseCornerBOp.Corners.NE));
FunctionRegistry.add(new URIImpl(GeoSparql.SOUTH_WEST_FUNCTION), getCornersBOPFactory(WikibaseCornerBOp.Corners.SW));
FunctionRegistry.add(new URIImpl(GeoSparql.GLOBE_FUNCTION), getCoordinatePartBOpFactory(CoordinatePartBOp.Parts.GLOBE));
FunctionRegistry.add(new URIImpl(GeoSparql.LON_FUNCTION), getCoordinatePartBOpFactory(CoordinatePartBOp.Parts.LON));
FunctionRegistry.add(new URIImpl(GeoSparql.LAT_FUNCTION), getCoordinatePartBOpFactory(CoordinatePartBOp.Parts.LAT));
// wikibase:decodeUri
FunctionRegistry.add(new URIImpl(Ontology.NAMESPACE + "decodeUri"), getDecodeUriBOpFactory());
IsSomeValueFunctionFactory.SomeValueMode mode = IsSomeValueFunctionFactory.SomeValueMode.lookup(System.getProperty("wikibaseSomeValueMode", "blank"));
UrisScheme uris = UrisSchemeFactory.getURISystem();
registerIsSomeValueFunction(FunctionRegistry::add, mode, uris.wellKnownBNodeIRIPrefix());
addPrefixes(uris);
log.info("Wikibase services initialized.");
}
Aggregations