Search in sources :

Example 6 with ReconConfig

use of com.google.refine.model.recon.ReconConfig in project OpenRefine by OpenRefine.

the class ReconJudgeSimilarCellsTests method testMarkNewTopics.

@Test
public void testMarkNewTopics() throws Exception {
    Project project = createCSVProject("A,B\n" + "foo,bar\n" + "alpha,beta\n");
    Column column = project.columnModel.columns.get(0);
    ReconConfig config = new StandardReconConfig("http://my.database/recon_service", "http://my.database/entity/", "http://my.database/schema/", null, null, true, Collections.emptyList());
    column.setReconConfig(config);
    AbstractOperation op = new ReconJudgeSimilarCellsOperation(ENGINE_CONFIG, "A", "foo", Recon.Judgment.New, null, true);
    Process process = op.createProcess(project, new Properties());
    process.performImmediate();
    Cell cell = project.rows.get(0).cells.get(0);
    assertEquals(Recon.Judgment.New, cell.recon.judgment);
    assertEquals("http://my.database/entity/", cell.recon.identifierSpace);
    assertNull(project.rows.get(1).cells.get(0).recon);
}
Also used : Project(com.google.refine.model.Project) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) ReconJudgeSimilarCellsOperation(com.google.refine.operations.recon.ReconJudgeSimilarCellsOperation) AbstractOperation(com.google.refine.model.AbstractOperation) Column(com.google.refine.model.Column) StandardReconConfig(com.google.refine.model.recon.StandardReconConfig) ReconConfig(com.google.refine.model.recon.ReconConfig) Process(com.google.refine.process.Process) Properties(java.util.Properties) Cell(com.google.refine.model.Cell) RefineTest(com.google.refine.RefineTest) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Example 7 with ReconConfig

use of com.google.refine.model.recon.ReconConfig in project OpenRefine by OpenRefine.

the class ReconJudgeSimilarCellsOperation method createRowVisitor.

@Override
protected RowVisitor createRowVisitor(Project project, List<CellChange> cellChanges, long historyEntryID) throws Exception {
    Column column = project.columnModel.getColumnByName(_columnName);
    ReconConfig reconConfig = column.getReconConfig();
    return new RowVisitor() {

        int _cellIndex;

        List<CellChange> _cellChanges;

        Recon _sharedNewRecon = null;

        Map<Long, Recon> _dupReconMap = new HashMap<Long, Recon>();

        long _historyEntryID;

        public RowVisitor init(int cellIndex, List<CellChange> cellChanges, long historyEntryID) {
            _cellIndex = cellIndex;
            _cellChanges = cellChanges;
            _historyEntryID = historyEntryID;
            return this;
        }

        @Override
        public void start(Project project) {
        // nothing to do
        }

        @Override
        public void end(Project project) {
        // nothing to do
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Cell cell = row.getCell(_cellIndex);
            if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) {
                String value = cell.value instanceof String ? ((String) cell.value) : cell.value.toString();
                if (_similarValue.equals(value)) {
                    Recon recon = null;
                    if (_judgment == Judgment.New && _shareNewTopics) {
                        if (_sharedNewRecon == null) {
                            if (reconConfig != null) {
                                _sharedNewRecon = reconConfig.createNewRecon(_historyEntryID);
                            } else {
                                // This should only happen if we are creating new cells
                                // in a column that has not been reconciled before.
                                // In that case, we do not know which reconciliation service
                                // to use, so we fall back on the default one.
                                _sharedNewRecon = new Recon(_historyEntryID, null, null);
                            }
                            _sharedNewRecon.judgment = Judgment.New;
                            _sharedNewRecon.judgmentBatchSize = 0;
                            _sharedNewRecon.judgmentAction = "similar";
                        }
                        _sharedNewRecon.judgmentBatchSize++;
                        recon = _sharedNewRecon;
                    } else {
                        if (_dupReconMap.containsKey(cell.recon.id)) {
                            recon = _dupReconMap.get(cell.recon.id);
                            recon.judgmentBatchSize++;
                        } else {
                            recon = cell.recon.dup(_historyEntryID);
                            recon.judgmentBatchSize = 1;
                            recon.matchRank = -1;
                            recon.judgmentAction = "similar";
                            if (_judgment == Judgment.Matched) {
                                recon.judgment = Recon.Judgment.Matched;
                                recon.match = _match;
                                if (recon.candidates != null) {
                                    for (int m = 0; m < recon.candidates.size(); m++) {
                                        if (recon.candidates.get(m).id.equals(_match.id)) {
                                            recon.matchRank = m;
                                            break;
                                        }
                                    }
                                }
                            } else if (_judgment == Judgment.New) {
                                recon.judgment = Recon.Judgment.New;
                                recon.match = null;
                            } else if (_judgment == Judgment.None) {
                                recon.judgment = Recon.Judgment.None;
                                recon.match = null;
                            }
                            _dupReconMap.put(cell.recon.id, recon);
                        }
                    }
                    Cell newCell = new Cell(cell.value, recon);
                    CellChange cellChange = new CellChange(rowIndex, _cellIndex, cell, newCell);
                    _cellChanges.add(cellChange);
                }
            }
            return false;
        }
    }.init(column.getCellIndex(), cellChanges, historyEntryID);
}
Also used : HashMap(java.util.HashMap) CellChange(com.google.refine.model.changes.CellChange) Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) ReconConfig(com.google.refine.model.recon.ReconConfig) Row(com.google.refine.model.Row) RowVisitor(com.google.refine.browsing.RowVisitor) Recon(com.google.refine.model.Recon) Cell(com.google.refine.model.Cell)

Aggregations

ReconConfig (com.google.refine.model.recon.ReconConfig)7 Column (com.google.refine.model.Column)6 Cell (com.google.refine.model.Cell)5 Project (com.google.refine.model.Project)4 Row (com.google.refine.model.Row)4 StandardReconConfig (com.google.refine.model.recon.StandardReconConfig)4 Recon (com.google.refine.model.Recon)3 HashMap (java.util.HashMap)3 RowVisitor (com.google.refine.browsing.RowVisitor)2 ReconCandidate (com.google.refine.model.ReconCandidate)2 CellChange (com.google.refine.model.changes.CellChange)2 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 RefineTest (com.google.refine.RefineTest)1 ReconJudgeOneCellCommand (com.google.refine.commands.recon.ReconJudgeOneCellCommand)1 AbstractOperation (com.google.refine.model.AbstractOperation)1 ReconStats (com.google.refine.model.ReconStats)1 ReconciledDataExtensionJob (com.google.refine.model.recon.ReconciledDataExtensionJob)1 DataExtension (com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension)1 DataExtensionConfig (com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtensionConfig)1