Search in sources :

Example 6 with Cell

use of com.google.refine.model.Cell in project OpenRefine by OpenRefine.

the class ColumnAdditionOperation method createRowVisitor.

protected RowVisitor createRowVisitor(Project project, List<CellAtRow> cellsAtRows) throws Exception {
    Column column = project.columnModel.getColumnByName(_baseColumnName);
    Evaluable eval = MetaParser.parse(_expression);
    Properties bindings = ExpressionUtils.createBindings(project);
    return new RowVisitor() {

        int cellIndex;

        Properties bindings;

        List<CellAtRow> cellsAtRows;

        Evaluable eval;

        public RowVisitor init(int cellIndex, Properties bindings, List<CellAtRow> cellsAtRows, Evaluable eval) {
            this.cellIndex = cellIndex;
            this.bindings = bindings;
            this.cellsAtRows = cellsAtRows;
            this.eval = eval;
            return this;
        }

        @Override
        public void start(Project project) {
        // nothing to do
        }

        @Override
        public void end(Project project) {
        // nothing to do
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Cell cell = row.getCell(cellIndex);
            Cell newCell = null;
            ExpressionUtils.bind(bindings, row, rowIndex, _baseColumnName, cell);
            Object o = eval.evaluate(bindings);
            if (o != null) {
                if (o instanceof Cell) {
                    newCell = (Cell) o;
                } else if (o instanceof WrappedCell) {
                    newCell = ((WrappedCell) o).cell;
                } else {
                    Serializable v = ExpressionUtils.wrapStorable(o);
                    if (ExpressionUtils.isError(v)) {
                        if (_onError == OnError.SetToBlank) {
                            return false;
                        } else if (_onError == OnError.KeepOriginal) {
                            v = cell != null ? cell.value : null;
                        }
                    }
                    if (v != null) {
                        newCell = new Cell(v, null);
                    }
                }
            }
            if (newCell != null) {
                cellsAtRows.add(new CellAtRow(rowIndex, newCell));
            }
            return false;
        }
    }.init(column.getCellIndex(), bindings, cellsAtRows, eval);
}
Also used : Serializable(java.io.Serializable) Properties(java.util.Properties) Evaluable(com.google.refine.expr.Evaluable) Project(com.google.refine.model.Project) CellAtRow(com.google.refine.model.changes.CellAtRow) WrappedCell(com.google.refine.expr.WrappedCell) Column(com.google.refine.model.Column) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) CellAtRow(com.google.refine.model.changes.CellAtRow) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell) WrappedCell(com.google.refine.expr.WrappedCell)

Example 7 with Cell

use of com.google.refine.model.Cell in project OpenRefine by OpenRefine.

the class FillDownOperation method createRowVisitor.

@Override
protected RowVisitor createRowVisitor(Project project, List<CellChange> cellChanges, long historyEntryID) throws Exception {
    Column column = project.columnModel.getColumnByName(_columnName);
    return new RowVisitor() {

        int cellIndex;

        List<CellChange> cellChanges;

        Cell previousCell;

        public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
            this.cellIndex = cellIndex;
            this.cellChanges = cellChanges;
            return this;
        }

        @Override
        public void start(Project project) {
        // nothing to do
        }

        @Override
        public void end(Project project) {
        // nothing to do
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Object value = row.getCellValue(cellIndex);
            if (ExpressionUtils.isNonBlankData(value)) {
                previousCell = row.getCell(cellIndex);
            } else if (previousCell != null) {
                CellChange cellChange = new CellChange(rowIndex, cellIndex, row.getCell(cellIndex), previousCell);
                cellChanges.add(cellChange);
            }
            return false;
        }
    }.init(column.getCellIndex(), cellChanges);
}
Also used : Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) CellChange(com.google.refine.model.changes.CellChange) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell)

Example 8 with Cell

use of com.google.refine.model.Cell in project OpenRefine by OpenRefine.

the class KeyValueColumnizeOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    int keyColumnIndex = project.columnModel.getColumnIndexByName(_keyColumnName);
    int valueColumnIndex = project.columnModel.getColumnIndexByName(_valueColumnName);
    int noteColumnIndex = _noteColumnName == null ? -1 : project.columnModel.getColumnIndexByName(_noteColumnName);
    Column keyColumn = project.columnModel.getColumnByName(_keyColumnName);
    Column valueColumn = project.columnModel.getColumnByName(_valueColumnName);
    Column noteColumn = _noteColumnName == null ? null : project.columnModel.getColumnByName(_noteColumnName);
    List<Column> unchangedColumns = new ArrayList<Column>();
    List<Column> oldColumns = project.columnModel.columns;
    for (int i = 0; i < oldColumns.size(); i++) {
        if (i != keyColumnIndex && i != valueColumnIndex && i != noteColumnIndex) {
            unchangedColumns.add(oldColumns.get(i));
        }
    }
    List<Column> newColumns = new ArrayList<Column>();
    List<Column> newNoteColumns = new ArrayList<Column>();
    Map<String, Column> keyValueToColumn = new HashMap<String, Column>();
    Map<String, Column> keyValueToNoteColumn = new HashMap<String, Column>();
    Map<String, Row> groupByCellValuesToRow = new HashMap<String, Row>();
    List<Row> newRows = new ArrayList<Row>();
    List<Row> oldRows = project.rows;
    Row reusableRow = null;
    List<Row> currentRows = new ArrayList<Row>();
    // key which indicates the start of a record
    String recordKey = null;
    if (unchangedColumns.isEmpty()) {
        reusableRow = new Row(1);
        newRows.add(reusableRow);
        currentRows.clear();
        currentRows.add(reusableRow);
    }
    for (int r = 0; r < oldRows.size(); r++) {
        Row oldRow = oldRows.get(r);
        Object key = oldRow.getCellValue(keyColumn.getCellIndex());
        if (!ExpressionUtils.isNonBlankData(key)) {
            if (unchangedColumns.isEmpty()) {
                // For degenerate 2 column case (plus optional note column), 
                // start a new row when we hit a blank line
                reusableRow = new Row(newColumns.size());
                newRows.add(reusableRow);
                currentRows.clear();
                currentRows.add(reusableRow);
            } else {
                // Copy rows with no key
                newRows.add(buildNewRow(unchangedColumns, oldRow, unchangedColumns.size()));
            }
            continue;
        }
        String keyString = key.toString();
        // TODO: Add support for processing in record mode instead of just by rows
        if (keyString.equals(recordKey) || recordKey == null) {
            reusableRow = new Row(newColumns.size());
            newRows.add(reusableRow);
            currentRows.clear();
            currentRows.add(reusableRow);
        }
        Column newColumn = keyValueToColumn.get(keyString);
        if (newColumn == null) {
            // Allocate new column
            newColumn = new Column(project.columnModel.allocateNewCellIndex(), project.columnModel.getUnduplicatedColumnName(keyString));
            keyValueToColumn.put(keyString, newColumn);
            newColumns.add(newColumn);
            // TODO: make customizable?
            if (recordKey == null) {
                recordKey = keyString;
            }
        }
        /*
             * NOTE: If we have additional columns, we currently merge all rows that
             * have identical values in those columns and then add our new columns.
             */
        if (unchangedColumns.size() > 0) {
            StringBuffer sb = new StringBuffer();
            for (int c = 0; c < unchangedColumns.size(); c++) {
                Column unchangedColumn = unchangedColumns.get(c);
                Object cellValue = oldRow.getCellValue(unchangedColumn.getCellIndex());
                if (c > 0) {
                    sb.append('\0');
                }
                if (cellValue != null) {
                    sb.append(cellValue.toString());
                }
            }
            String unchangedCellValues = sb.toString();
            reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
            if (reusableRow == null || reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
                reusableRow = buildNewRow(unchangedColumns, oldRow, newColumn.getCellIndex() + 1);
                groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
                newRows.add(reusableRow);
            }
        }
        Cell cell = oldRow.getCell(valueColumn.getCellIndex());
        if (unchangedColumns.size() == 0) {
            int index = newColumn.getCellIndex();
            Row row = getAvailableRow(currentRows, newRows, index);
            row.setCell(index, cell);
        } else {
            // TODO: support repeating keys in this mode too
            reusableRow.setCell(newColumn.getCellIndex(), cell);
        }
        if (noteColumn != null) {
            Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex());
            if (ExpressionUtils.isNonBlankData(noteValue)) {
                Column newNoteColumn = keyValueToNoteColumn.get(keyString);
                if (newNoteColumn == null) {
                    // Allocate new column
                    newNoteColumn = new Column(project.columnModel.allocateNewCellIndex(), project.columnModel.getUnduplicatedColumnName(noteColumn.getName() + " : " + keyString));
                    keyValueToNoteColumn.put(keyString, newNoteColumn);
                    newNoteColumns.add(newNoteColumn);
                }
                int newNoteCellIndex = newNoteColumn.getCellIndex();
                Object existingNewNoteValue = reusableRow.getCellValue(newNoteCellIndex);
                if (ExpressionUtils.isNonBlankData(existingNewNoteValue)) {
                    Cell concatenatedNoteCell = new Cell(existingNewNoteValue.toString() + ";" + noteValue.toString(), null);
                    reusableRow.setCell(newNoteCellIndex, concatenatedNoteCell);
                } else {
                    reusableRow.setCell(newNoteCellIndex, oldRow.getCell(noteColumn.getCellIndex()));
                }
            }
        }
    }
    List<Column> allColumns = new ArrayList<Column>(unchangedColumns);
    allColumns.addAll(newColumns);
    allColumns.addAll(newNoteColumns);
    // clean up the empty rows 
    for (int i = newRows.size() - 1; i >= 0; i--) {
        if (newRows.get(i).isEmpty())
            newRows.remove(i);
    }
    return new HistoryEntry(historyEntryID, project, getBriefDescription(null), this, new MassRowColumnChange(allColumns, newRows));
}
Also used : HashMap(java.util.HashMap) MassRowColumnChange(com.google.refine.model.changes.MassRowColumnChange) ArrayList(java.util.ArrayList) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell)

Example 9 with Cell

use of com.google.refine.model.Cell in project OpenRefine by OpenRefine.

the class ReconClearSimilarCellsOperation method createRowVisitor.

@Override
protected RowVisitor createRowVisitor(final Project project, final List<CellChange> cellChanges, final long historyEntryID) throws Exception {
    Column column = project.columnModel.getColumnByName(_columnName);
    final int cellIndex = column != null ? column.getCellIndex() : -1;
    return new RowVisitor() {

        @Override
        public void start(Project project) {
        // nothing to do
        }

        @Override
        public void end(Project project) {
        // nothing to do
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Cell cell = cellIndex < 0 ? null : row.getCell(cellIndex);
            if (cell != null && cell.recon != null) {
                String value = cell.value instanceof String ? ((String) cell.value) : cell.value.toString();
                if (_similarValue.equals(value)) {
                    Cell newCell = new Cell(cell.value, null);
                    CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
                    cellChanges.add(cellChange);
                }
            }
            return false;
        }
    };
}
Also used : Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) CellChange(com.google.refine.model.changes.CellChange) Row(com.google.refine.model.Row) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell)

Example 10 with Cell

use of com.google.refine.model.Cell in project OpenRefine by OpenRefine.

the class ReconCopyAcrossColumnsOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(final Project project, final long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    final Column fromColumn = project.columnModel.getColumnByName(_fromColumnName);
    final List<Column> toColumns = new ArrayList<Column>(_toColumnNames.length);
    for (String c : _toColumnNames) {
        Column toColumn = project.columnModel.getColumnByName(c);
        if (toColumn != null) {
            toColumns.add(toColumn);
        }
    }
    final Set<Recon.Judgment> judgments = new HashSet<Recon.Judgment>(_judgments.length);
    for (String j : _judgments) {
        judgments.add(Recon.stringToJudgment(j));
    }
    final List<CellChange> cellChanges = new ArrayList<CellChange>(project.rows.size());
    if (fromColumn != null && toColumns.size() > 0) {
        final Map<Object, Recon> cellValueToRecon = new HashMap<Object, Recon>();
        FilteredRows filteredRows = engine.getAllFilteredRows();
        try {
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    Cell cell = row.getCell(fromColumn.getCellIndex());
                    if (cell != null && cell.value != null && cell.recon != null) {
                        if (judgments.contains(cell.recon.judgment)) {
                            cellValueToRecon.put(cell.value, cell.recon);
                        }
                    }
                    return false;
                }
            });
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    for (Column column : toColumns) {
                        int cellIndex = column.getCellIndex();
                        Cell cell = row.getCell(cellIndex);
                        if (cell != null && cell.value != null) {
                            Recon reconToCopy = cellValueToRecon.get(cell.value);
                            boolean judged = cell.recon != null && cell.recon.judgment != Judgment.None;
                            if (reconToCopy != null && (!judged || _applyToJudgedCells)) {
                                Cell newCell = new Cell(cell.value, reconToCopy);
                                CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
                                cellChanges.add(cellChange);
                            }
                        }
                    }
                    return false;
                }
            });
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    String description = "Copy " + cellChanges.size() + " recon judgments from column " + _fromColumnName + " to " + StringUtils.join(_toColumnNames);
    return new HistoryEntry(historyEntryID, project, description, this, new MassChange(cellChanges, false));
}
Also used : HashMap(java.util.HashMap) CellChange(com.google.refine.model.changes.CellChange) ArrayList(java.util.ArrayList) FilteredRows(com.google.refine.browsing.FilteredRows) JSONException(org.json.JSONException) MassChange(com.google.refine.model.changes.MassChange) Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Recon(com.google.refine.model.Recon) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell) Engine(com.google.refine.browsing.Engine) HashSet(java.util.HashSet) Judgment(com.google.refine.model.Recon.Judgment)

Aggregations

Cell (com.google.refine.model.Cell)58 Row (com.google.refine.model.Row)36 Column (com.google.refine.model.Column)19 Test (org.testng.annotations.Test)16 RefineTest (com.google.refine.tests.RefineTest)15 BeforeTest (org.testng.annotations.BeforeTest)15 JSONObject (org.json.JSONObject)13 ArrayList (java.util.ArrayList)12 Project (com.google.refine.model.Project)11 IOException (java.io.IOException)11 Properties (java.util.Properties)11 JSONException (org.json.JSONException)9 RowVisitor (com.google.refine.browsing.RowVisitor)7 HistoryEntry (com.google.refine.history.HistoryEntry)7 Serializable (java.io.Serializable)7 Recon (com.google.refine.model.Recon)6 CellChange (com.google.refine.model.changes.CellChange)6 HashMap (java.util.HashMap)6 Evaluable (com.google.refine.expr.Evaluable)5 JSONArray (org.json.JSONArray)4