Search in sources :

Example 6 with RowVisitor

use of com.google.refine.browsing.RowVisitor in project OpenRefine by OpenRefine.

the class ColumnAdditionOperation method createRowVisitor.

protected RowVisitor createRowVisitor(Project project, List<CellAtRow> cellsAtRows) throws Exception {
    Column column = project.columnModel.getColumnByName(_baseColumnName);
    Evaluable eval = MetaParser.parse(_expression);
    Properties bindings = ExpressionUtils.createBindings(project);
    return new RowVisitor() {

        int cellIndex;

        Properties bindings;

        List<CellAtRow> cellsAtRows;

        Evaluable eval;

        public RowVisitor init(int cellIndex, Properties bindings, List<CellAtRow> cellsAtRows, Evaluable eval) {
            this.cellIndex = cellIndex;
            this.bindings = bindings;
            this.cellsAtRows = cellsAtRows;
            this.eval = eval;
            return this;
        }

        @Override
        public void start(Project project) {
        // nothing to do
        }

        @Override
        public void end(Project project) {
        // nothing to do
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Cell cell = row.getCell(cellIndex);
            Cell newCell = null;
            ExpressionUtils.bind(bindings, row, rowIndex, _baseColumnName, cell);
            Object o = eval.evaluate(bindings);
            if (o != null) {
                if (o instanceof Cell) {
                    newCell = (Cell) o;
                } else if (o instanceof WrappedCell) {
                    newCell = ((WrappedCell) o).cell;
                } else {
                    Serializable v = ExpressionUtils.wrapStorable(o);
                    if (ExpressionUtils.isError(v)) {
                        if (_onError == OnError.SetToBlank) {
                            return false;
                        } else if (_onError == OnError.KeepOriginal) {
                            v = cell != null ? cell.value : null;
                        }
                    }
                    if (v != null) {
                        newCell = new Cell(v, null);
                    }
                }
            }
            if (newCell != null) {
                cellsAtRows.add(new CellAtRow(rowIndex, newCell));
            }
            return false;
        }
    }.init(column.getCellIndex(), bindings, cellsAtRows, eval);
}
Also used : Serializable(java.io.Serializable) Properties(java.util.Properties) Evaluable(com.google.refine.expr.Evaluable) Project(com.google.refine.model.Project) CellAtRow(com.google.refine.model.changes.CellAtRow) WrappedCell(com.google.refine.expr.WrappedCell) Column(com.google.refine.model.Column) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) CellAtRow(com.google.refine.model.changes.CellAtRow) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell) WrappedCell(com.google.refine.expr.WrappedCell)

Example 7 with RowVisitor

use of com.google.refine.browsing.RowVisitor in project OpenRefine by OpenRefine.

the class FillDownOperation method createRowVisitor.

@Override
protected RowVisitor createRowVisitor(Project project, List<CellChange> cellChanges, long historyEntryID) throws Exception {
    Column column = project.columnModel.getColumnByName(_columnName);
    return new RowVisitor() {

        int cellIndex;

        List<CellChange> cellChanges;

        Cell previousCell;

        public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
            this.cellIndex = cellIndex;
            this.cellChanges = cellChanges;
            return this;
        }

        @Override
        public void start(Project project) {
        // nothing to do
        }

        @Override
        public void end(Project project) {
        // nothing to do
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Object value = row.getCellValue(cellIndex);
            if (ExpressionUtils.isNonBlankData(value)) {
                previousCell = row.getCell(cellIndex);
            } else if (previousCell != null) {
                CellChange cellChange = new CellChange(rowIndex, cellIndex, row.getCell(cellIndex), previousCell);
                cellChanges.add(cellChange);
            }
            return false;
        }
    }.init(column.getCellIndex(), cellChanges);
}
Also used : Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) CellChange(com.google.refine.model.changes.CellChange) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell)

Example 8 with RowVisitor

use of com.google.refine.browsing.RowVisitor in project OpenRefine by OpenRefine.

the class ColumnSplitOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    Column column = project.columnModel.getColumnByName(_columnName);
    if (column == null) {
        throw new Exception("No column named " + _columnName);
    }
    List<String> columnNames = new ArrayList<String>();
    List<Integer> rowIndices = new ArrayList<Integer>(project.rows.size());
    List<List<Serializable>> tuples = new ArrayList<List<Serializable>>(project.rows.size());
    FilteredRows filteredRows = engine.getAllFilteredRows();
    RowVisitor rowVisitor;
    if ("lengths".equals(_mode)) {
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            @Override
            protected java.util.List<Serializable> split(String s) {
                List<Serializable> results = new ArrayList<Serializable>(_fieldLengths.length + 1);
                int lastIndex = 0;
                for (int length : _fieldLengths) {
                    int from = lastIndex;
                    int to = Math.min(from + length, s.length());
                    results.add(stringToValue(s.substring(from, to)));
                    lastIndex = to;
                }
                return results;
            }

            ;
        };
    } else if (_regex) {
        Pattern pattern = Pattern.compile(_separator);
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            Pattern _pattern;

            @Override
            protected java.util.List<Serializable> split(String s) {
                return stringArrayToValueList(_pattern.split(s, _maxColumns));
            }

            ;

            public RowVisitor init(Pattern pattern) {
                _pattern = pattern;
                return this;
            }
        }.init(pattern);
    } else {
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            @Override
            protected java.util.List<Serializable> split(String s) {
                return stringArrayToValueList(StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator, _maxColumns));
            }

            ;
        };
    }
    filteredRows.accept(project, rowVisitor);
    String description = "Split " + rowIndices.size() + " cell(s) in column " + _columnName + " into several columns" + ("separator".equals(_mode) ? " by separator" : " by field lengths");
    Change change = new ColumnSplitChange(_columnName, columnNames, rowIndices, tuples, _removeOriginalColumn);
    return new HistoryEntry(historyEntryID, project, description, this, change);
}
Also used : Pattern(java.util.regex.Pattern) Serializable(java.io.Serializable) ColumnSplitChange(com.google.refine.model.changes.ColumnSplitChange) ArrayList(java.util.ArrayList) Change(com.google.refine.history.Change) ColumnSplitChange(com.google.refine.model.changes.ColumnSplitChange) FilteredRows(com.google.refine.browsing.FilteredRows) JSONException(org.json.JSONException) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) ArrayList(java.util.ArrayList) List(java.util.List) RowVisitor(com.google.refine.browsing.RowVisitor) Engine(com.google.refine.browsing.Engine)

Example 9 with RowVisitor

use of com.google.refine.browsing.RowVisitor in project OpenRefine by OpenRefine.

the class ReconClearSimilarCellsOperation method createRowVisitor.

@Override
protected RowVisitor createRowVisitor(final Project project, final List<CellChange> cellChanges, final long historyEntryID) throws Exception {
    Column column = project.columnModel.getColumnByName(_columnName);
    final int cellIndex = column != null ? column.getCellIndex() : -1;
    return new RowVisitor() {

        @Override
        public void start(Project project) {
        // nothing to do
        }

        @Override
        public void end(Project project) {
        // nothing to do
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Cell cell = cellIndex < 0 ? null : row.getCell(cellIndex);
            if (cell != null && cell.recon != null) {
                String value = cell.value instanceof String ? ((String) cell.value) : cell.value.toString();
                if (_similarValue.equals(value)) {
                    Cell newCell = new Cell(cell.value, null);
                    CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
                    cellChanges.add(cellChange);
                }
            }
            return false;
        }
    };
}
Also used : Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) CellChange(com.google.refine.model.changes.CellChange) Row(com.google.refine.model.Row) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell)

Example 10 with RowVisitor

use of com.google.refine.browsing.RowVisitor in project OpenRefine by OpenRefine.

the class ReconCopyAcrossColumnsOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(final Project project, final long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    final Column fromColumn = project.columnModel.getColumnByName(_fromColumnName);
    final List<Column> toColumns = new ArrayList<Column>(_toColumnNames.length);
    for (String c : _toColumnNames) {
        Column toColumn = project.columnModel.getColumnByName(c);
        if (toColumn != null) {
            toColumns.add(toColumn);
        }
    }
    final Set<Recon.Judgment> judgments = new HashSet<Recon.Judgment>(_judgments.length);
    for (String j : _judgments) {
        judgments.add(Recon.stringToJudgment(j));
    }
    final List<CellChange> cellChanges = new ArrayList<CellChange>(project.rows.size());
    if (fromColumn != null && toColumns.size() > 0) {
        final Map<Object, Recon> cellValueToRecon = new HashMap<Object, Recon>();
        FilteredRows filteredRows = engine.getAllFilteredRows();
        try {
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    Cell cell = row.getCell(fromColumn.getCellIndex());
                    if (cell != null && cell.value != null && cell.recon != null) {
                        if (judgments.contains(cell.recon.judgment)) {
                            cellValueToRecon.put(cell.value, cell.recon);
                        }
                    }
                    return false;
                }
            });
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    for (Column column : toColumns) {
                        int cellIndex = column.getCellIndex();
                        Cell cell = row.getCell(cellIndex);
                        if (cell != null && cell.value != null) {
                            Recon reconToCopy = cellValueToRecon.get(cell.value);
                            boolean judged = cell.recon != null && cell.recon.judgment != Judgment.None;
                            if (reconToCopy != null && (!judged || _applyToJudgedCells)) {
                                Cell newCell = new Cell(cell.value, reconToCopy);
                                CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
                                cellChanges.add(cellChange);
                            }
                        }
                    }
                    return false;
                }
            });
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    String description = "Copy " + cellChanges.size() + " recon judgments from column " + _fromColumnName + " to " + StringUtils.join(_toColumnNames);
    return new HistoryEntry(historyEntryID, project, description, this, new MassChange(cellChanges, false));
}
Also used : HashMap(java.util.HashMap) CellChange(com.google.refine.model.changes.CellChange) ArrayList(java.util.ArrayList) FilteredRows(com.google.refine.browsing.FilteredRows) JSONException(org.json.JSONException) MassChange(com.google.refine.model.changes.MassChange) Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Recon(com.google.refine.model.Recon) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell) Engine(com.google.refine.browsing.Engine) HashSet(java.util.HashSet) Judgment(com.google.refine.model.Recon.Judgment)

Aggregations

RowVisitor (com.google.refine.browsing.RowVisitor)12 Column (com.google.refine.model.Column)9 Project (com.google.refine.model.Project)9 Row (com.google.refine.model.Row)8 JSONObject (org.json.JSONObject)8 Cell (com.google.refine.model.Cell)7 CellChange (com.google.refine.model.changes.CellChange)6 FilteredRows (com.google.refine.browsing.FilteredRows)5 JSONException (org.json.JSONException)5 Engine (com.google.refine.browsing.Engine)4 Serializable (java.io.Serializable)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Properties (java.util.Properties)4 RecordVisitor (com.google.refine.browsing.RecordVisitor)3 Evaluable (com.google.refine.expr.Evaluable)3 HistoryEntry (com.google.refine.history.HistoryEntry)3 SortingRecordVisitor (com.google.refine.sorting.SortingRecordVisitor)3 SortingRowVisitor (com.google.refine.sorting.SortingRowVisitor)3 FilteredRecords (com.google.refine.browsing.FilteredRecords)2