Search in sources :

Example 1 with ReconConfig

use of com.google.refine.model.recon.ReconConfig in project OpenRefine by OpenRefine.

the class PreviewExtendDataCommand method doPost.

@Override
public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    if (!hasValidCSRFToken(request)) {
        respondCSRFError(response);
        return;
    }
    try {
        Project project = getProject(request);
        String columnName = request.getParameter("columnName");
        String rowIndicesString = request.getParameter("rowIndices");
        if (rowIndicesString == null) {
            respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }");
            return;
        }
        String jsonString = request.getParameter("extension");
        DataExtensionConfig config = DataExtensionConfig.reconstruct(jsonString);
        List<Integer> rowIndices = ParsingUtilities.mapper.readValue(rowIndicesString, new TypeReference<List<Integer>>() {
        });
        int length = rowIndices.size();
        Column column = project.columnModel.getColumnByName(columnName);
        int cellIndex = column.getCellIndex();
        // get the endpoint to extract data from
        String endpoint = null;
        ReconConfig cfg = column.getReconConfig();
        if (cfg != null && cfg instanceof StandardReconConfig) {
            StandardReconConfig scfg = (StandardReconConfig) cfg;
            endpoint = scfg.service;
        } else {
            respond(response, "{ \"code\" : \"error\", \"message\" : \"This column has not been reconciled with a standard service.\" }");
            return;
        }
        List<String> topicNames = new ArrayList<String>();
        List<String> topicIds = new ArrayList<String>();
        Set<String> ids = new HashSet<String>();
        for (int i = 0; i < length; i++) {
            int rowIndex = rowIndices.get(i);
            if (rowIndex >= 0 && rowIndex < project.rows.size()) {
                Row row = project.rows.get(rowIndex);
                Cell cell = row.getCell(cellIndex);
                if (cell != null && cell.recon != null && cell.recon.match != null) {
                    topicNames.add(cell.recon.match.name);
                    topicIds.add(cell.recon.match.id);
                    ids.add(cell.recon.match.id);
                } else {
                    topicNames.add(null);
                    topicIds.add(null);
                    ids.add(null);
                }
            }
        }
        Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
        ReconciledDataExtensionJob job = new ReconciledDataExtensionJob(config, endpoint);
        Map<String, DataExtension> map = job.extend(ids, reconCandidateMap);
        List<List<Object>> rows = new ArrayList<>();
        for (int r = 0; r < topicNames.size(); r++) {
            String id = topicIds.get(r);
            String topicName = topicNames.get(r);
            if (id != null && map.containsKey(id)) {
                DataExtension ext = map.get(id);
                boolean first = true;
                if (ext.data.length > 0) {
                    for (Object[] row : ext.data) {
                        List<Object> jsonRow = new ArrayList<>();
                        if (first) {
                            jsonRow.add(topicName);
                            first = false;
                        } else {
                            jsonRow.add(null);
                        }
                        for (Object cell : row) {
                            jsonRow.add(cell);
                        }
                        rows.add(jsonRow);
                    }
                    continue;
                }
            }
            List<Object> supplement = new ArrayList<>();
            if (id != null) {
                supplement.add(new ReconCandidate(id, topicName, new String[0], 100));
            } else {
                supplement.add("<not reconciled>");
            }
            rows.add(supplement);
        }
        respondJSON(response, new PreviewResponse(job.columns, rows));
    } catch (Exception e) {
        respondException(response, e);
    }
}
Also used : ReconciledDataExtensionJob(com.google.refine.model.recon.ReconciledDataExtensionJob) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataExtension(com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension) Column(com.google.refine.model.Column) ReconConfig(com.google.refine.model.recon.ReconConfig) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) ArrayList(java.util.ArrayList) List(java.util.List) Cell(com.google.refine.model.Cell) ReconCandidate(com.google.refine.model.ReconCandidate) HashSet(java.util.HashSet) DataExtensionConfig(com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig) ServletException(javax.servlet.ServletException) IOException(java.io.IOException) Project(com.google.refine.model.Project) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) Row(com.google.refine.model.Row)

Example 2 with ReconConfig

use of com.google.refine.model.recon.ReconConfig in project OpenRefine by OpenRefine.

the class NewEntityLibrary method updateReconciledCells.

/**
 * Changes the "new" reconciled cells to their allocated ids for later use.
 *
 * @param reset:
 *            set to true to revert the operation (set cells to "new")
 */
public void updateReconciledCells(Project project, boolean reset) {
    Set<Integer> impactedColumns = new HashSet<>();
    for (Row row : project.rows) {
        for (int i = 0; i != row.cells.size(); i++) {
            Cell cell = row.cells.get(i);
            if (cell == null || cell.recon == null) {
                continue;
            }
            Recon recon = cell.recon;
            boolean changed = false;
            if (Recon.Judgment.New.equals(recon.judgment) && !reset && map.containsKey(recon.id)) {
                recon.judgment = Recon.Judgment.Matched;
                recon.match = new ReconCandidate(map.get(recon.id), cell.value.toString(), new String[0], 100);
                recon.addCandidate(recon.match);
                changed = true;
            } else if (Recon.Judgment.Matched.equals(recon.judgment) && reset && map.containsKey(recon.id)) {
                recon.judgment = Recon.Judgment.New;
                if (recon.candidates != null) {
                    recon.candidates.remove(recon.candidates.size() - 1);
                }
                recon.match = null;
                changed = true;
            }
            if (changed) {
                impactedColumns.add(i);
                // Compute features
                Column column = project.columnModel.getColumnByCellIndex(i);
                ReconConfig config = column.getReconConfig();
                if (config instanceof StandardReconConfig) {
                    StandardReconConfig stdConfig = (StandardReconConfig) config;
                    if (cell.getValue() instanceof String) {
                        stdConfig.computeFeatures(recon, (String) cell.getValue());
                    }
                }
            }
        }
    }
    // Update reconciliation statistics for impacted columns
    for (Integer colId : impactedColumns) {
        Column column = project.columnModel.getColumnByCellIndex(colId);
        column.setReconStats(ReconStats.create(project, colId));
    }
}
Also used : StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) Column(com.google.refine.model.Column) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) ReconConfig(com.google.refine.model.recon.ReconConfig) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell) Recon(com.google.refine.model.Recon) ReconCandidate(com.google.refine.model.ReconCandidate) HashSet(java.util.HashSet)

Example 3 with ReconConfig

use of com.google.refine.model.recon.ReconConfig in project OpenRefine by OpenRefine.

the class ReconChange method load.

public static Change load(LineNumberReader reader, Pool pool) throws Exception {
    ReconConfig newReconConfig = null;
    ReconStats newReconStats = null;
    ReconConfig oldReconConfig = null;
    ReconStats oldReconStats = null;
    String commonColumnName = null;
    CellChange[] cellChanges = null;
    String line;
    while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
        int equal = line.indexOf('=');
        CharSequence field = line.subSequence(0, equal);
        String value = line.substring(equal + 1);
        if ("newReconConfig".equals(field)) {
            if (value.length() > 0) {
                newReconConfig = ReconConfig.reconstruct(value);
            }
        } else if ("newReconStats".equals(field)) {
            if (value.length() > 0) {
                newReconStats = ParsingUtilities.mapper.readValue(value, ReconStats.class);
            }
        } else if ("oldReconConfig".equals(field)) {
            if (value.length() > 0) {
                oldReconConfig = ReconConfig.reconstruct(value);
            }
        } else if ("oldReconStats".equals(field)) {
            if (value.length() > 0) {
                oldReconStats = ParsingUtilities.mapper.readValue(value, ReconStats.class);
            }
        } else if ("commonColumnName".equals(field)) {
            commonColumnName = value;
        } else if ("cellChangeCount".equals(field)) {
            int cellChangeCount = Integer.parseInt(value);
            cellChanges = new CellChange[cellChangeCount];
            for (int i = 0; i < cellChangeCount; i++) {
                cellChanges[i] = CellChange.load(reader, pool);
            }
        }
    }
    ReconChange change = new ReconChange(cellChanges, commonColumnName, newReconConfig, newReconStats);
    change._oldReconConfig = oldReconConfig;
    change._oldReconStats = oldReconStats;
    return change;
}
Also used : ReconConfig(com.google.refine.model.recon.ReconConfig) ReconStats(com.google.refine.model.ReconStats)

Example 4 with ReconConfig

use of com.google.refine.model.recon.ReconConfig in project OpenRefine by OpenRefine.

the class ReconMarkNewTopicsOperation method createRowVisitor.

@Override
protected RowVisitor createRowVisitor(Project project, List<CellChange> cellChanges, long historyEntryID) throws Exception {
    Column column = project.columnModel.getColumnByName(_columnName);
    ReconConfig reconConfig = column.getReconConfig();
    return new RowVisitor() {

        int cellIndex;

        List<CellChange> cellChanges;

        Map<String, Recon> sharedRecons = new HashMap<String, Recon>();

        long historyEntryID;

        public RowVisitor init(int cellIndex, List<CellChange> cellChanges, long historyEntryID) {
            this.cellIndex = cellIndex;
            this.cellChanges = cellChanges;
            this.historyEntryID = historyEntryID;
            return this;
        }

        @Override
        public void start(Project project) {
        // nothing to do
        }

        @Override
        public void end(Project project) {
        // nothing to do
        }

        private Recon createNewRecon() {
            if (reconConfig != null) {
                return reconConfig.createNewRecon(historyEntryID);
            } else {
                // we just resort to the default reconciliation space.
                return new Recon(historyEntryID, null, null);
            }
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Cell cell = row.getCell(cellIndex);
            if (cell != null) {
                Recon recon = null;
                if (_shareNewTopics) {
                    String s = cell.value == null ? "" : cell.value.toString();
                    if (sharedRecons.containsKey(s)) {
                        recon = sharedRecons.get(s);
                        recon.judgmentBatchSize++;
                    } else {
                        recon = createNewRecon();
                        recon.judgment = Judgment.New;
                        recon.judgmentBatchSize = 1;
                        recon.judgmentAction = "mass";
                        sharedRecons.put(s, recon);
                    }
                } else {
                    recon = cell.recon == null ? createNewRecon() : cell.recon.dup(historyEntryID);
                    recon.match = null;
                    recon.matchRank = -1;
                    recon.judgment = Judgment.New;
                    recon.judgmentBatchSize = 1;
                    recon.judgmentAction = "mass";
                }
                Cell newCell = new Cell(cell.value, recon);
                CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
                cellChanges.add(cellChange);
            }
            return false;
        }
    }.init(column.getCellIndex(), cellChanges, historyEntryID);
}
Also used : HashMap(java.util.HashMap) CellChange(com.google.refine.model.changes.CellChange) Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) ReconConfig(com.google.refine.model.recon.ReconConfig) Row(com.google.refine.model.Row) RowVisitor(com.google.refine.browsing.RowVisitor) Recon(com.google.refine.model.Recon) Cell(com.google.refine.model.Cell)

Example 5 with ReconConfig

use of com.google.refine.model.recon.ReconConfig in project OpenRefine by OpenRefine.

the class ReconJudgeOneCellCommandTest method setUp.

@BeforeMethod
public void setUp() {
    project = createCSVProject("reconciled column,unreconciled column\n" + "a,b\n" + "c,d\n");
    Column reconciled = project.columnModel.columns.get(0);
    ReconConfig config = new StandardReconConfig("http://my.recon.service/api", "http://my.recon.service/rdf/space", "http://my.recon.service/rdf/schema", "type3894", "octopus", true, Collections.emptyList(), 5);
    reconciled.setReconConfig(config);
    request = mock(HttpServletRequest.class);
    response = mock(HttpServletResponse.class);
    when(request.getParameter("project")).thenReturn(String.valueOf(project.id));
    when(request.getParameter("csrf_token")).thenReturn(Command.csrfFactory.getFreshToken());
    writer = mock(PrintWriter.class);
    try {
        when(response.getWriter()).thenReturn(writer);
    } catch (IOException e1) {
        Assert.fail();
    }
    command = new ReconJudgeOneCellCommand();
}
Also used : HttpServletRequest(javax.servlet.http.HttpServletRequest) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) Column(com.google.refine.model.Column) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) ReconConfig(com.google.refine.model.recon.ReconConfig) HttpServletResponse(javax.servlet.http.HttpServletResponse) ReconJudgeOneCellCommand(com.google.refine.commands.recon.ReconJudgeOneCellCommand) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter) BeforeMethod(org.testng.annotations.BeforeMethod)

Aggregations

ReconConfig (com.google.refine.model.recon.ReconConfig)7 Column (com.google.refine.model.Column)6 Cell (com.google.refine.model.Cell)5 Project (com.google.refine.model.Project)4 Row (com.google.refine.model.Row)4 StandardReconConfig (com.google.refine.model.recon.StandardReconConfig)4 Recon (com.google.refine.model.Recon)3 HashMap (java.util.HashMap)3 RowVisitor (com.google.refine.browsing.RowVisitor)2 ReconCandidate (com.google.refine.model.ReconCandidate)2 CellChange (com.google.refine.model.changes.CellChange)2 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 RefineTest (com.google.refine.RefineTest)1 ReconJudgeOneCellCommand (com.google.refine.commands.recon.ReconJudgeOneCellCommand)1 AbstractOperation (com.google.refine.model.AbstractOperation)1 ReconStats (com.google.refine.model.ReconStats)1 ReconciledDataExtensionJob (com.google.refine.model.recon.ReconciledDataExtensionJob)1 DataExtension (com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension)1 DataExtensionConfig (com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig)1