Search in sources :

Example 1 with DataExtensionConfig

use of com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig in project OpenRefine by OpenRefine.

the class PreviewExtendDataCommand method doPost.

@Override
public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    if (!hasValidCSRFToken(request)) {
        respondCSRFError(response);
        return;
    }
    try {
        Project project = getProject(request);
        String columnName = request.getParameter("columnName");
        String rowIndicesString = request.getParameter("rowIndices");
        if (rowIndicesString == null) {
            respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }");
            return;
        }
        String jsonString = request.getParameter("extension");
        DataExtensionConfig config = DataExtensionConfig.reconstruct(jsonString);
        List<Integer> rowIndices = ParsingUtilities.mapper.readValue(rowIndicesString, new TypeReference<List<Integer>>() {
        });
        int length = rowIndices.size();
        Column column = project.columnModel.getColumnByName(columnName);
        int cellIndex = column.getCellIndex();
        // get the endpoint to extract data from
        String endpoint = null;
        ReconConfig cfg = column.getReconConfig();
        if (cfg != null && cfg instanceof StandardReconConfig) {
            StandardReconConfig scfg = (StandardReconConfig) cfg;
            endpoint = scfg.service;
        } else {
            respond(response, "{ \"code\" : \"error\", \"message\" : \"This column has not been reconciled with a standard service.\" }");
            return;
        }
        List<String> topicNames = new ArrayList<String>();
        List<String> topicIds = new ArrayList<String>();
        Set<String> ids = new HashSet<String>();
        for (int i = 0; i < length; i++) {
            int rowIndex = rowIndices.get(i);
            if (rowIndex >= 0 && rowIndex < project.rows.size()) {
                Row row = project.rows.get(rowIndex);
                Cell cell = row.getCell(cellIndex);
                if (cell != null && cell.recon != null && cell.recon.match != null) {
                    topicNames.add(cell.recon.match.name);
                    topicIds.add(cell.recon.match.id);
                    ids.add(cell.recon.match.id);
                } else {
                    topicNames.add(null);
                    topicIds.add(null);
                    ids.add(null);
                }
            }
        }
        Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
        ReconciledDataExtensionJob job = new ReconciledDataExtensionJob(config, endpoint);
        Map<String, DataExtension> map = job.extend(ids, reconCandidateMap);
        List<List<Object>> rows = new ArrayList<>();
        for (int r = 0; r < topicNames.size(); r++) {
            String id = topicIds.get(r);
            String topicName = topicNames.get(r);
            if (id != null && map.containsKey(id)) {
                DataExtension ext = map.get(id);
                boolean first = true;
                if (ext.data.length > 0) {
                    for (Object[] row : ext.data) {
                        List<Object> jsonRow = new ArrayList<>();
                        if (first) {
                            jsonRow.add(topicName);
                            first = false;
                        } else {
                            jsonRow.add(null);
                        }
                        for (Object cell : row) {
                            jsonRow.add(cell);
                        }
                        rows.add(jsonRow);
                    }
                    continue;
                }
            }
            List<Object> supplement = new ArrayList<>();
            if (id != null) {
                supplement.add(new ReconCandidate(id, topicName, new String[0], 100));
            } else {
                supplement.add("<not reconciled>");
            }
            rows.add(supplement);
        }
        respondJSON(response, new PreviewResponse(job.columns, rows));
    } catch (Exception e) {
        respondException(response, e);
    }
}
Also used : ReconciledDataExtensionJob(com.google.refine.model.recon.ReconciledDataExtensionJob) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataExtension(com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension) Column(com.google.refine.model.Column) ReconConfig(com.google.refine.model.recon.ReconConfig) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) ArrayList(java.util.ArrayList) List(java.util.List) Cell(com.google.refine.model.Cell) ReconCandidate(com.google.refine.model.ReconCandidate) HashSet(java.util.HashSet) DataExtensionConfig(com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig) ServletException(javax.servlet.ServletException) IOException(java.io.IOException) Project(com.google.refine.model.Project) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) Row(com.google.refine.model.Row)

Example 2 with DataExtensionConfig

use of com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig in project OpenRefine by OpenRefine.

the class ExtendDataOperationTests method testFetchCurrent.

/**
 * Test fetch only the best statements
 */
@Test
public void testFetchCurrent() throws Exception {
    DataExtensionConfig extension = DataExtensionConfig.reconstruct("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"rank\":\"best\"}}]}");
    mockHttpCall("{\"ids\":[\"Q863\",\"Q794\",\"Q17\",\"Q30\"],\"properties\":[{\"id\":\"P38\",\"settings\":{\"rank\":\"best\"}}]}", "{\"rows\":{" + "   \"Q794\": {\"P38\": [{\"name\": \"Iranian rial\", \"id\": \"Q188608\"}]}," + "   \"Q863\": {\"P38\": [{\"name\": \"Tajikistani somoni\", \"id\": \"Q199886\"}]}," + "   \"Q30\": {\"P38\": [{\"name\": \"United States dollar\", \"id\": \"Q4917\"}]}," + "   \"Q17\": {\"P38\": [{\"name\": \"Japanese yen\", \"id\": \"Q8146\"}]}" + "}, \"meta\": [" + "     {\"settings\": {\"rank\": \"best\"}, \"name\": \"currency\", \"id\": \"P38\"}" + "]}");
    try (MockWebServer server = new MockWebServer()) {
        server.start();
        server.setDispatcher(dispatcher);
        EngineDependentOperation op = new ExtendDataOperation(engine_config, "country", server.url("/reconcile").url().toString(), RECON_IDENTIFIER_SPACE, RECON_SCHEMA_SPACE, extension, 1);
        LongRunningProcessStub process = new LongRunningProcessStub(op.createProcess(project, options));
        process.run();
        /*
             * Tajikistan has one "preferred" currency and one "normal" one (in terms of statement ranks). But thanks to
             * our setting in the extension configuration, we only fetch the current one, so the one just after it is
             * the one for the US (USD).
             */
        Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1)));
        Assert.assertTrue("United States dollar".equals(project.rows.get(3).getCellValue(1)));
        // Make sure all the values are reconciled
        Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 4);
    }
}
Also used : LongRunningProcessStub(com.google.refine.process.LongRunningProcessStub) EngineDependentOperation(com.google.refine.operations.EngineDependentOperation) MockWebServer(okhttp3.mockwebserver.MockWebServer) DataExtensionConfig(com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig) RefineTest(com.google.refine.RefineTest) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Example 3 with DataExtensionConfig

use of com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig in project OpenRefine by OpenRefine.

the class ExtendDataOperationTests method testFetchRecord.

/**
 * Test fetch records (multiple values per reconciled cell)
 */
@Test
public void testFetchRecord() throws Exception {
    DataExtensionConfig extension = DataExtensionConfig.reconstruct("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"rank\":\"any\"}}]}");
    mockHttpCall("{\"ids\":[\"Q863\",\"Q794\",\"Q17\",\"Q30\"],\"properties\":[{\"id\":\"P38\",\"settings\":{\"rank\":\"any\"}}]}", "{\"rows\": {" + "   \"Q794\": {\"P38\": [{\"name\": \"Iranian rial\", \"id\": \"Q188608\"}]}," + "   \"Q863\": {\"P38\": [{\"name\": \"Tajikistani somoni\", \"id\": \"Q199886\"}, {\"name\": \"Tajikistani ruble\", \"id\": \"Q2423956\"}]}," + "   \"Q30\": {\"P38\": [{\"name\": \"United States dollar\", \"id\": \"Q4917\"}]}," + "   \"Q17\": {\"P38\": [{\"name\": \"Japanese yen\", \"id\": \"Q8146\"}]}" + "}," + "\"meta\": [" + "    {\"settings\": {\"rank\": \"any\"}, \"name\": \"currency\", \"id\": \"P38\"}" + "]}");
    try (MockWebServer server = new MockWebServer()) {
        server.start();
        server.setDispatcher(dispatcher);
        EngineDependentOperation op = new ExtendDataOperation(engine_config, "country", server.url("/reconcile").url().toString(), RECON_IDENTIFIER_SPACE, RECON_SCHEMA_SPACE, extension, 1);
        LongRunningProcessStub process = new LongRunningProcessStub(op.createProcess(project, options));
        process.run();
        /*
             * Tajikistan has one "preferred" currency and one "normal" one (in terms of statement ranks). The second
             * currency is fetched as well, which creates a record (the cell to the left of it is left blank).
             */
        Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1)), "Bad currency name for Tajikistan");
        Assert.assertTrue("Tajikistani ruble".equals(project.rows.get(3).getCellValue(1)), "Bad currency name for Tajikistan");
        Assert.assertTrue(null == project.rows.get(3).getCellValue(0));
        // Make sure all the values are reconciled
        Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 5);
    }
}
Also used : LongRunningProcessStub(com.google.refine.process.LongRunningProcessStub) EngineDependentOperation(com.google.refine.operations.EngineDependentOperation) MockWebServer(okhttp3.mockwebserver.MockWebServer) DataExtensionConfig(com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig) RefineTest(com.google.refine.RefineTest) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Example 4 with DataExtensionConfig

use of com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig in project OpenRefine by OpenRefine.

the class ExtendDataCommand method createOperation.

@Override
protected AbstractOperation createOperation(Project project, HttpServletRequest request, EngineConfig engineConfig) throws Exception {
    String baseColumnName = request.getParameter("baseColumnName");
    int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
    String endpoint = request.getParameter("endpoint");
    String identifierSpace = request.getParameter("identifierSpace");
    String schemaSpace = request.getParameter("schemaSpace");
    String jsonString = request.getParameter("extension");
    DataExtensionConfig extension = DataExtensionConfig.reconstruct(jsonString);
    return new ExtendDataOperation(engineConfig, baseColumnName, endpoint, identifierSpace, schemaSpace, extension, columnInsertIndex);
}
Also used : DataExtensionConfig(com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig) ExtendDataOperation(com.google.refine.operations.recon.ExtendDataOperation)

Example 5 with DataExtensionConfig

use of com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig in project OpenRefine by OpenRefine.

the class ExtendDataOperationTests method testFormulateQuery.

@Test
public void testFormulateQuery() throws IOException {
    DataExtensionConfig config = DataExtensionConfig.reconstruct(dataExtensionConfigJson);
    Set<String> ids = Collections.singleton("Q2");
    String json = "{\"ids\":[\"Q2\"],\"properties\":[{\"id\":\"P571\"},{\"id\":\"P159\"},{\"id\":\"P625\"}]}";
    ReconciledDataExtensionJobStub stub = new ReconciledDataExtensionJobStub(config, "http://endpoint");
    TestUtils.assertEqualsAsJson(stub.formulateQueryStub(ids, config), json);
}
Also used : DataExtensionConfig(com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig) RefineTest(com.google.refine.RefineTest) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Aggregations

DataExtensionConfig (com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig)7 RefineTest (com.google.refine.RefineTest)5 BeforeTest (org.testng.annotations.BeforeTest)5 Test (org.testng.annotations.Test)5 EngineDependentOperation (com.google.refine.operations.EngineDependentOperation)4 LongRunningProcessStub (com.google.refine.process.LongRunningProcessStub)4 MockWebServer (okhttp3.mockwebserver.MockWebServer)4 Cell (com.google.refine.model.Cell)1 Column (com.google.refine.model.Column)1 Project (com.google.refine.model.Project)1 ReconCandidate (com.google.refine.model.ReconCandidate)1 Row (com.google.refine.model.Row)1 ReconConfig (com.google.refine.model.recon.ReconConfig)1 ReconciledDataExtensionJob (com.google.refine.model.recon.ReconciledDataExtensionJob)1 DataExtension (com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension)1 StandardReconConfig (com.google.refine.model.recon.StandardReconConfig)1 ExtendDataOperation (com.google.refine.operations.recon.ExtendDataOperation)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1