use of com.google.refine.process.ProcessManager in project OpenRefine by OpenRefine.
the class StandardReconConfigTests method reconNonJsonTest.
@Test
public void reconNonJsonTest() throws Exception {
Project project = createCSVProject("title,director\n" + "mulholland drive,david lynch");
String nonJsonResponse = "<!DOCTYPE html>\n" + "<html lang=\"en\">\n" + " <head>\n" + " <meta charset=\"utf-8\">\n" + " <title>Error</title>\n" + " </head>\n" + " <body>\n" + " You have reached an error page.\n" + " </body>\n" + "</html>";
try (MockWebServer server = new MockWebServer()) {
server.start();
HttpUrl url = server.url("/openrefine-wikidata/en/api");
server.enqueue(new MockResponse().setBody(nonJsonResponse));
server.enqueue(new MockResponse());
String configJson = " {\n" + " \"mode\": \"standard-service\",\n" + " \"service\": \"" + url + "\",\n" + " \"identifierSpace\": \"http://www.wikidata.org/entity/\",\n" + " \"schemaSpace\": \"http://www.wikidata.org/prop/direct/\",\n" + " \"type\": {\n" + " \"id\": \"Q11424\",\n" + " \"name\": \"film\"\n" + " },\n" + " \"autoMatch\": true,\n" + " \"columnDetails\": [\n" + " {\n" + " \"column\": \"director\",\n" + " \"propertyName\": \"Director\",\n" + " \"propertyID\": \"P57\"\n" + " }\n" + " ]}";
StandardReconConfig config = StandardReconConfig.reconstruct(configJson);
ReconOperation op = new ReconOperation(EngineConfig.reconstruct(null), "director", config);
Process process = op.createProcess(project, new Properties());
ProcessManager pm = project.getProcessManager();
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
RecordedRequest request1 = server.takeRequest();
assertNotNull(request1);
// We won't have gotten a result, but we want to make sure things didn't die.
Row row = project.rows.get(0);
Cell cell = row.cells.get(1);
assertNotNull(cell.value);
assertNull(cell.recon);
// the recon object is left null, so that it can be told apart from
// empty recon objects (the service legitimally did not return any candidate)
}
}
use of com.google.refine.process.ProcessManager in project OpenRefine by OpenRefine.
the class UrlFetchingTests method testUrlCaching.
/**
* Test for caching
*/
@Test
public void testUrlCaching() throws Exception {
for (int i = 0; i < 100; i++) {
Row row = new Row(2);
row.setCell(0, new Cell(i < 5 ? "apple" : "orange", null));
project.rows.add(row);
}
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config, "fruits", "\"https://www.random.org/integers/?num=1&min=1&max=100&col=1&base=10&format=plain&rnd=new&city=\"+value", OnError.SetToBlank, "rand", 1, 500, true);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
// We have 100 rows and 500 ms per row but only two distinct
// values so we should not wait more than ~2000 ms to get the
// results. Just to make sure the test passes with plenty of
// net latency we sleep for longer (but still less than
// 50,000ms).
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
// Inspect rows
String ref_val = (String) project.rows.get(0).getCellValue(1);
// just to make sure I picked the right column
Assert.assertTrue(ref_val != "apple");
for (int i = 1; i < 4; i++) {
// all random values should be equal due to caching
Assert.assertEquals(project.rows.get(i).getCellValue(1), ref_val);
}
}
use of com.google.refine.process.ProcessManager in project OpenRefine by OpenRefine.
the class StandardReconConfigTests method reconTest.
@Test
public void reconTest() throws Exception {
Project project = createCSVProject("title,director\n" + "mulholland drive,david lynch");
String reconResponse = "{\n" + "q0: {\n" + " result: [\n" + " {\n" + " P57: {\n" + "score: 100,\n" + "weighted: 40\n" + "},\n" + "all_labels: {\n" + "score: 59,\n" + "weighted: 59\n" + "},\n" + "score: 70.71428571428572,\n" + "id: \"Q3989262\",\n" + "name: \"The Short Films of David Lynch\",\n" + "type: [\n" + "{\n" + "id: \"Q24862\",\n" + "name: \"short film\"\n" + "},\n" + "{\n" + "id: \"Q202866\",\n" + "name: \"animated film\"\n" + "}\n" + "],\n" + "match: false\n" + "},\n" + "{\n" + "P57: {\n" + "score: 100,\n" + "weighted: 40\n" + "},\n" + "all_labels: {\n" + "score: 44,\n" + "weighted: 44\n" + "},\n" + "score: 60.00000000000001,\n" + "id: \"Q83365219\",\n" + "name: \"What Did Jack Do?\",\n" + "type: [\n" + "{\n" + "id: \"Q24862\",\n" + "name: \"short film\"\n" + "}\n" + "],\n" + "match: false\n" + " }\n" + " ]\n" + " }\n" + "}\n";
try (MockWebServer server = new MockWebServer()) {
server.start();
HttpUrl url = server.url("/openrefine-wikidata/en/api");
// service initially overloaded
server.enqueue(new MockResponse().setResponseCode(503));
// service returns successfully
server.enqueue(new MockResponse().setBody(reconResponse));
server.enqueue(new MockResponse());
String configJson = " {\n" + " \"mode\": \"standard-service\",\n" + " \"service\": \"" + url + "\",\n" + " \"identifierSpace\": \"http://www.wikidata.org/entity/\",\n" + " \"schemaSpace\": \"http://www.wikidata.org/prop/direct/\",\n" + " \"type\": {\n" + " \"id\": \"Q11424\",\n" + " \"name\": \"film\"\n" + " },\n" + " \"autoMatch\": true,\n" + " \"columnDetails\": [\n" + " {\n" + " \"column\": \"director\",\n" + " \"propertyName\": \"Director\",\n" + " \"propertyID\": \"P57\"\n" + " }\n" + " ]}";
StandardReconConfig config = StandardReconConfig.reconstruct(configJson);
ReconOperation op = new ReconOperation(EngineConfig.reconstruct(null), "director", config);
Process process = op.createProcess(project, new Properties());
ProcessManager pm = project.getProcessManager();
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(1500);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
// ignore the first request which was a 503 error
server.takeRequest();
RecordedRequest request1 = server.takeRequest();
assertNotNull(request1);
String query = request1.getBody().readUtf8Line();
assertNotNull(query);
String expected = "queries=" + URLEncoder.encode("{\"q0\":{\"query\":\"david lynch\",\"type\":\"Q11424\",\"properties\":[{\"pid\":\"P57\",\"v\":\"david lynch\"}],\"type_strict\":\"should\"}}", "UTF-8");
assertEquals(query, expected);
Row row = project.rows.get(0);
Cell cell = row.cells.get(1);
assertNotNull(cell.recon);
assertEquals(cell.recon.service, url.toString());
assertEquals(cell.recon.getBestCandidate().types[0], "Q24862");
}
}
use of com.google.refine.process.ProcessManager in project OpenRefine by OpenRefine.
the class ColumnAdditionByFetchingURLsOperationTests method runAndWait.
private void runAndWait(EngineDependentOperation op, int timeout) throws Exception {
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
runAndWait(pm, process, timeout);
}
Aggregations