Search in sources :

Example 1 with StandardReconConfig

use of com.google.refine.model.recon.StandardReconConfig in project OpenRefine by OpenRefine.

the class PreviewExtendDataCommand method doPost.

@Override
public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    if (!hasValidCSRFToken(request)) {
        respondCSRFError(response);
        return;
    }
    try {
        Project project = getProject(request);
        String columnName = request.getParameter("columnName");
        String rowIndicesString = request.getParameter("rowIndices");
        if (rowIndicesString == null) {
            respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }");
            return;
        }
        String jsonString = request.getParameter("extension");
        DataExtensionConfig config = DataExtensionConfig.reconstruct(jsonString);
        List<Integer> rowIndices = ParsingUtilities.mapper.readValue(rowIndicesString, new TypeReference<List<Integer>>() {
        });
        int length = rowIndices.size();
        Column column = project.columnModel.getColumnByName(columnName);
        int cellIndex = column.getCellIndex();
        // get the endpoint to extract data from
        String endpoint = null;
        ReconConfig cfg = column.getReconConfig();
        if (cfg != null && cfg instanceof StandardReconConfig) {
            StandardReconConfig scfg = (StandardReconConfig) cfg;
            endpoint = scfg.service;
        } else {
            respond(response, "{ \"code\" : \"error\", \"message\" : \"This column has not been reconciled with a standard service.\" }");
            return;
        }
        List<String> topicNames = new ArrayList<String>();
        List<String> topicIds = new ArrayList<String>();
        Set<String> ids = new HashSet<String>();
        for (int i = 0; i < length; i++) {
            int rowIndex = rowIndices.get(i);
            if (rowIndex >= 0 && rowIndex < project.rows.size()) {
                Row row = project.rows.get(rowIndex);
                Cell cell = row.getCell(cellIndex);
                if (cell != null && cell.recon != null && cell.recon.match != null) {
                    topicNames.add(cell.recon.match.name);
                    topicIds.add(cell.recon.match.id);
                    ids.add(cell.recon.match.id);
                } else {
                    topicNames.add(null);
                    topicIds.add(null);
                    ids.add(null);
                }
            }
        }
        Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
        ReconciledDataExtensionJob job = new ReconciledDataExtensionJob(config, endpoint);
        Map<String, DataExtension> map = job.extend(ids, reconCandidateMap);
        List<List<Object>> rows = new ArrayList<>();
        for (int r = 0; r < topicNames.size(); r++) {
            String id = topicIds.get(r);
            String topicName = topicNames.get(r);
            if (id != null && map.containsKey(id)) {
                DataExtension ext = map.get(id);
                boolean first = true;
                if (ext.data.length > 0) {
                    for (Object[] row : ext.data) {
                        List<Object> jsonRow = new ArrayList<>();
                        if (first) {
                            jsonRow.add(topicName);
                            first = false;
                        } else {
                            jsonRow.add(null);
                        }
                        for (Object cell : row) {
                            jsonRow.add(cell);
                        }
                        rows.add(jsonRow);
                    }
                    continue;
                }
            }
            List<Object> supplement = new ArrayList<>();
            if (id != null) {
                supplement.add(new ReconCandidate(id, topicName, new String[0], 100));
            } else {
                supplement.add("<not reconciled>");
            }
            rows.add(supplement);
        }
        respondJSON(response, new PreviewResponse(job.columns, rows));
    } catch (Exception e) {
        respondException(response, e);
    }
}
Also used : ReconciledDataExtensionJob(com.google.refine.model.recon.ReconciledDataExtensionJob) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataExtension(com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension) Column(com.google.refine.model.Column) ReconConfig(com.google.refine.model.recon.ReconConfig) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) ArrayList(java.util.ArrayList) List(java.util.List) Cell(com.google.refine.model.Cell) ReconCandidate(com.google.refine.model.ReconCandidate) HashSet(java.util.HashSet) DataExtensionConfig(com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig) ServletException(javax.servlet.ServletException) IOException(java.io.IOException) Project(com.google.refine.model.Project) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) Row(com.google.refine.model.Row)

Example 2 with StandardReconConfig

use of com.google.refine.model.recon.StandardReconConfig in project OpenRefine by OpenRefine.

the class NewEntityLibrary method updateReconciledCells.

/**
 * Changes the "new" reconciled cells to their allocated ids for later use.
 *
 * @param reset:
 *            set to true to revert the operation (set cells to "new")
 */
public void updateReconciledCells(Project project, boolean reset) {
    Set<Integer> impactedColumns = new HashSet<>();
    for (Row row : project.rows) {
        for (int i = 0; i != row.cells.size(); i++) {
            Cell cell = row.cells.get(i);
            if (cell == null || cell.recon == null) {
                continue;
            }
            Recon recon = cell.recon;
            boolean changed = false;
            if (Recon.Judgment.New.equals(recon.judgment) && !reset && map.containsKey(recon.id)) {
                recon.judgment = Recon.Judgment.Matched;
                recon.match = new ReconCandidate(map.get(recon.id), cell.value.toString(), new String[0], 100);
                recon.addCandidate(recon.match);
                changed = true;
            } else if (Recon.Judgment.Matched.equals(recon.judgment) && reset && map.containsKey(recon.id)) {
                recon.judgment = Recon.Judgment.New;
                if (recon.candidates != null) {
                    recon.candidates.remove(recon.candidates.size() - 1);
                }
                recon.match = null;
                changed = true;
            }
            if (changed) {
                impactedColumns.add(i);
                // Compute features
                Column column = project.columnModel.getColumnByCellIndex(i);
                ReconConfig config = column.getReconConfig();
                if (config instanceof StandardReconConfig) {
                    StandardReconConfig stdConfig = (StandardReconConfig) config;
                    if (cell.getValue() instanceof String) {
                        stdConfig.computeFeatures(recon, (String) cell.getValue());
                    }
                }
            }
        }
    }
    // Update reconciliation statistics for impacted columns
    for (Integer colId : impactedColumns) {
        Column column = project.columnModel.getColumnByCellIndex(colId);
        column.setReconStats(ReconStats.create(project, colId));
    }
}
Also used : StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) Column(com.google.refine.model.Column) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) ReconConfig(com.google.refine.model.recon.ReconConfig) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell) Recon(com.google.refine.model.Recon) ReconCandidate(com.google.refine.model.ReconCandidate) HashSet(java.util.HashSet)

Example 3 with StandardReconConfig

use of com.google.refine.model.recon.StandardReconConfig in project OpenRefine by OpenRefine.

the class WikitextImporter method parseOneFile.

@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, Reader reader, int limit, ObjectNode options, List<Exception> exceptions) {
    // Set-up a simple wiki configuration
    ParserConfig parserConfig = new SimpleParserConfig();
    try {
        // Encoding validation
        WikitextEncodingValidator v = new WikitextEncodingValidator();
        String wikitext = CharStreams.toString(reader);
        String title = "Page title";
        ValidatedWikitext validated = v.validate(parserConfig, wikitext, title);
        // Pre-processing
        WikitextPreprocessor prep = new WikitextPreprocessor(parserConfig);
        WtPreproWikitextPage prepArticle = (WtPreproWikitextPage) prep.parseArticle(validated, title, false);
        // Parsing
        PreprocessedWikitext ppw = PreprocessorToParserTransformer.transform(prepArticle);
        WikitextParser parser = new WikitextParser(parserConfig);
        WtParsedWikitextPage parsedArticle;
        parsedArticle = (WtParsedWikitextPage) parser.parseArticle(ppw, title);
        // Compile the retrieved page
        boolean blankSpanningCells = JSONUtilities.getBoolean(options, "blankSpanningCells", true);
        boolean includeRawTemplates = JSONUtilities.getBoolean(options, "includeRawTemplates", false);
        boolean parseReferences = JSONUtilities.getBoolean(options, "parseReferences", true);
        final WikitextTableVisitor vs = new WikitextTableVisitor(blankSpanningCells, includeRawTemplates);
        vs.go(parsedArticle);
        WikiTableDataReader dataReader = new WikiTableDataReader(vs, parseReferences);
        // Reconcile if needed
        String wikiUrl = JSONUtilities.getString(options, "wikiUrl", null);
        // Wikidata reconciliation endpoint, hardcoded because the user might not have it in its services
        String reconUrl = JSONUtilities.getString(options, "reconService", "https://wikidata.reconci.link/en/api");
        StandardReconConfig cfg = getReconConfig(reconUrl);
        if (wikiUrl != null) {
            dataReader.reconcileToQids(wikiUrl, cfg);
        }
        // Set metadata
        if (vs.caption != null && vs.caption.length() > 0) {
            metadata.setName(vs.caption);
        // TODO this does not seem to do anything - maybe we need to pass it to OpenRefine in some other way?
        }
        TabularImportingParserBase.readTable(project, job, dataReader, limit, options, exceptions);
        // Add reconciliation statistics
        if (dataReader.columnReconciled != null) {
            for (int i = 0; i != dataReader.columnReconciled.size(); i++) {
                if (dataReader.columnReconciled.get(i)) {
                    Column col = project.columnModel.columns.get(i);
                    col.setReconStats(ReconStats.create(project, i));
                    col.setReconConfig(cfg);
                }
            }
        }
    } catch (IOException e1) {
        e1.printStackTrace();
    } catch (ParseException e1) {
        exceptions.add(e1);
        e1.printStackTrace();
    }
}
Also used : WtParsedWikitextPage(org.sweble.wikitext.parser.nodes.WtParsedWikitextPage) WikitextEncodingValidator(org.sweble.wikitext.parser.WikitextEncodingValidator) SimpleParserConfig(org.sweble.wikitext.parser.utils.SimpleParserConfig) IOException(java.io.IOException) PreprocessedWikitext(org.sweble.wikitext.parser.preprocessor.PreprocessedWikitext) ValidatedWikitext(org.sweble.wikitext.parser.encval.ValidatedWikitext) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) Column(com.google.refine.model.Column) WikitextPreprocessor(org.sweble.wikitext.parser.WikitextPreprocessor) WtPreproWikitextPage(org.sweble.wikitext.parser.nodes.WtPreproWikitextPage) ParseException(xtc.parser.ParseException) WikitextParser(org.sweble.wikitext.parser.WikitextParser) ParserConfig(org.sweble.wikitext.parser.ParserConfig) SimpleParserConfig(org.sweble.wikitext.parser.utils.SimpleParserConfig)

Example 4 with StandardReconConfig

use of com.google.refine.model.recon.StandardReconConfig in project OpenRefine by OpenRefine.

the class NewEntityLibraryTest method testUpdateReconciledCells.

@Test
public void testUpdateReconciledCells() {
    Project project = createCSVProject(TestingData.inceptionWithNewCsv);
    StandardReconConfig config = new StandardReconConfig("http://my.endpoint", "http://my.schema", "http://my.schema", "Q5", "human", true, Collections.emptyList());
    project.columnModel.columns.get(0).setReconConfig(config);
    project.rows.get(0).cells.set(0, TestingData.makeNewItemCell(3289L, "University of Ljubljana"));
    project.rows.get(1).cells.set(0, TestingData.makeMatchedCell("Q865528", "University of Warwick"));
    project.rows.get(2).cells.set(0, TestingData.makeNewItemCell(1234L, "new uni"));
    isNewTo(3289L, project.rows.get(0).cells.get(0));
    isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
    isNewTo(1234L, project.rows.get(2).cells.get(0));
    library.updateReconciledCells(project, false);
    Cell firstCell = project.rows.get(0).cells.get(0);
    isMatchedTo("Q384", firstCell);
    assertTrue((Boolean) firstCell.recon.getFeature(Recon.Feature_nameMatch));
    isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
    isMatchedTo("Q345", project.rows.get(2).cells.get(0));
    assertTrue(project.rows.get(2).cells.get(0).recon.getFeature(Recon.Feature_nameLevenshtein).equals(0));
    library.updateReconciledCells(project, true);
    isNewTo(3289L, project.rows.get(0).cells.get(0));
    isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
    isNewTo(1234L, project.rows.get(2).cells.get(0));
}
Also used : Project(com.google.refine.model.Project) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) Cell(com.google.refine.model.Cell) JacksonSerializationTest(org.openrefine.wikidata.testing.JacksonSerializationTest) WikidataRefineTest(org.openrefine.wikidata.testing.WikidataRefineTest) Test(org.testng.annotations.Test)

Example 5 with StandardReconConfig

use of com.google.refine.model.recon.StandardReconConfig in project OpenRefine by OpenRefine.

the class ReconJudgeOneCellCommandTest method setUp.

@BeforeMethod
public void setUp() {
    project = createCSVProject("reconciled column,unreconciled column\n" + "a,b\n" + "c,d\n");
    Column reconciled = project.columnModel.columns.get(0);
    ReconConfig config = new StandardReconConfig("http://my.recon.service/api", "http://my.recon.service/rdf/space", "http://my.recon.service/rdf/schema", "type3894", "octopus", true, Collections.emptyList(), 5);
    reconciled.setReconConfig(config);
    request = mock(HttpServletRequest.class);
    response = mock(HttpServletResponse.class);
    when(request.getParameter("project")).thenReturn(String.valueOf(project.id));
    when(request.getParameter("csrf_token")).thenReturn(Command.csrfFactory.getFreshToken());
    writer = mock(PrintWriter.class);
    try {
        when(response.getWriter()).thenReturn(writer);
    } catch (IOException e1) {
        Assert.fail();
    }
    command = new ReconJudgeOneCellCommand();
}
Also used : HttpServletRequest(javax.servlet.http.HttpServletRequest) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) Column(com.google.refine.model.Column) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) ReconConfig(com.google.refine.model.recon.ReconConfig) HttpServletResponse(javax.servlet.http.HttpServletResponse) ReconJudgeOneCellCommand(com.google.refine.commands.recon.ReconJudgeOneCellCommand) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter) BeforeMethod(org.testng.annotations.BeforeMethod)

Aggregations

StandardReconConfig (com.google.refine.model.recon.StandardReconConfig)7 Column (com.google.refine.model.Column)6 Cell (com.google.refine.model.Cell)4 Project (com.google.refine.model.Project)4 ReconConfig (com.google.refine.model.recon.ReconConfig)4 IOException (java.io.IOException)3 Test (org.testng.annotations.Test)3 RefineTest (com.google.refine.RefineTest)2 Recon (com.google.refine.model.Recon)2 ReconCandidate (com.google.refine.model.ReconCandidate)2 Row (com.google.refine.model.Row)2 Process (com.google.refine.process.Process)2 HashSet (java.util.HashSet)2 Properties (java.util.Properties)2 BeforeTest (org.testng.annotations.BeforeTest)2 ReconJudgeOneCellCommand (com.google.refine.commands.recon.ReconJudgeOneCellCommand)1 AbstractOperation (com.google.refine.model.AbstractOperation)1 ReconJob (com.google.refine.model.recon.ReconJob)1 ReconciledDataExtensionJob (com.google.refine.model.recon.ReconciledDataExtensionJob)1 DataExtension (com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension)1