Search in sources :

Example 11 with Cell

use of com.google.refine.model.Cell in project OpenRefine by OpenRefine.

the class FillDownOperation method createRowVisitor.

@Override
protected RowVisitor createRowVisitor(Project project, List<CellChange> cellChanges, long historyEntryID) throws Exception {
    Column column = project.columnModel.getColumnByName(_columnName);
    return new RowVisitor() {

        int cellIndex;

        List<CellChange> cellChanges;

        Cell previousCell;

        public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
            this.cellIndex = cellIndex;
            this.cellChanges = cellChanges;
            return this;
        }

        @Override
        public void start(Project project) {
        // nothing to do
        }

        @Override
        public void end(Project project) {
        // nothing to do
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Object value = row.getCellValue(cellIndex);
            if (ExpressionUtils.isNonBlankData(value)) {
                previousCell = row.getCell(cellIndex);
            } else if (previousCell != null) {
                CellChange cellChange = new CellChange(rowIndex, cellIndex, row.getCell(cellIndex), previousCell);
                cellChanges.add(cellChange);
            }
            return false;
        }
    }.init(column.getCellIndex(), cellChanges);
}
Also used : Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) CellChange(com.google.refine.model.changes.CellChange) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell)

Example 12 with Cell

use of com.google.refine.model.Cell in project OpenRefine by OpenRefine.

the class KeyValueColumnizeOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    int keyColumnIndex = project.columnModel.getColumnIndexByName(_keyColumnName);
    int valueColumnIndex = project.columnModel.getColumnIndexByName(_valueColumnName);
    int noteColumnIndex = _noteColumnName == null ? -1 : project.columnModel.getColumnIndexByName(_noteColumnName);
    Column keyColumn = project.columnModel.getColumnByName(_keyColumnName);
    Column valueColumn = project.columnModel.getColumnByName(_valueColumnName);
    Column noteColumn = _noteColumnName == null ? null : project.columnModel.getColumnByName(_noteColumnName);
    List<Column> unchangedColumns = new ArrayList<Column>();
    List<Column> oldColumns = project.columnModel.columns;
    for (int i = 0; i < oldColumns.size(); i++) {
        if (i != keyColumnIndex && i != valueColumnIndex && i != noteColumnIndex) {
            unchangedColumns.add(oldColumns.get(i));
        }
    }
    List<Column> newColumns = new ArrayList<Column>();
    List<Column> newNoteColumns = new ArrayList<Column>();
    Map<String, Column> keyValueToColumn = new HashMap<String, Column>();
    Map<String, Column> keyValueToNoteColumn = new HashMap<String, Column>();
    Map<String, Row> groupByCellValuesToRow = new HashMap<String, Row>();
    List<Row> newRows = new ArrayList<Row>();
    List<Row> oldRows = project.rows;
    Row reusableRow = null;
    List<Row> currentRows = new ArrayList<Row>();
    // key which indicates the start of a record
    String recordKey = null;
    if (unchangedColumns.isEmpty()) {
        reusableRow = new Row(1);
        newRows.add(reusableRow);
        currentRows.clear();
        currentRows.add(reusableRow);
    }
    for (int r = 0; r < oldRows.size(); r++) {
        Row oldRow = oldRows.get(r);
        Object key = oldRow.getCellValue(keyColumn.getCellIndex());
        if (!ExpressionUtils.isNonBlankData(key)) {
            if (unchangedColumns.isEmpty()) {
                // For degenerate 2 column case (plus optional note column), 
                // start a new row when we hit a blank line
                reusableRow = new Row(newColumns.size());
                newRows.add(reusableRow);
                currentRows.clear();
                currentRows.add(reusableRow);
            } else {
                // Copy rows with no key
                newRows.add(buildNewRow(unchangedColumns, oldRow, unchangedColumns.size()));
            }
            continue;
        }
        String keyString = key.toString();
        // TODO: Add support for processing in record mode instead of just by rows
        if (keyString.equals(recordKey) || recordKey == null) {
            reusableRow = new Row(newColumns.size());
            newRows.add(reusableRow);
            currentRows.clear();
            currentRows.add(reusableRow);
        }
        Column newColumn = keyValueToColumn.get(keyString);
        if (newColumn == null) {
            // Allocate new column
            newColumn = new Column(project.columnModel.allocateNewCellIndex(), project.columnModel.getUnduplicatedColumnName(keyString));
            keyValueToColumn.put(keyString, newColumn);
            newColumns.add(newColumn);
            // TODO: make customizable?
            if (recordKey == null) {
                recordKey = keyString;
            }
        }
        /*
             * NOTE: If we have additional columns, we currently merge all rows that
             * have identical values in those columns and then add our new columns.
             */
        if (unchangedColumns.size() > 0) {
            StringBuffer sb = new StringBuffer();
            for (int c = 0; c < unchangedColumns.size(); c++) {
                Column unchangedColumn = unchangedColumns.get(c);
                Object cellValue = oldRow.getCellValue(unchangedColumn.getCellIndex());
                if (c > 0) {
                    sb.append('\0');
                }
                if (cellValue != null) {
                    sb.append(cellValue.toString());
                }
            }
            String unchangedCellValues = sb.toString();
            reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
            if (reusableRow == null || reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
                reusableRow = buildNewRow(unchangedColumns, oldRow, newColumn.getCellIndex() + 1);
                groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
                newRows.add(reusableRow);
            }
        }
        Cell cell = oldRow.getCell(valueColumn.getCellIndex());
        if (unchangedColumns.size() == 0) {
            int index = newColumn.getCellIndex();
            Row row = getAvailableRow(currentRows, newRows, index);
            row.setCell(index, cell);
        } else {
            // TODO: support repeating keys in this mode too
            reusableRow.setCell(newColumn.getCellIndex(), cell);
        }
        if (noteColumn != null) {
            Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex());
            if (ExpressionUtils.isNonBlankData(noteValue)) {
                Column newNoteColumn = keyValueToNoteColumn.get(keyString);
                if (newNoteColumn == null) {
                    // Allocate new column
                    newNoteColumn = new Column(project.columnModel.allocateNewCellIndex(), project.columnModel.getUnduplicatedColumnName(noteColumn.getName() + " : " + keyString));
                    keyValueToNoteColumn.put(keyString, newNoteColumn);
                    newNoteColumns.add(newNoteColumn);
                }
                int newNoteCellIndex = newNoteColumn.getCellIndex();
                Object existingNewNoteValue = reusableRow.getCellValue(newNoteCellIndex);
                if (ExpressionUtils.isNonBlankData(existingNewNoteValue)) {
                    Cell concatenatedNoteCell = new Cell(existingNewNoteValue.toString() + ";" + noteValue.toString(), null);
                    reusableRow.setCell(newNoteCellIndex, concatenatedNoteCell);
                } else {
                    reusableRow.setCell(newNoteCellIndex, oldRow.getCell(noteColumn.getCellIndex()));
                }
            }
        }
    }
    List<Column> allColumns = new ArrayList<Column>(unchangedColumns);
    allColumns.addAll(newColumns);
    allColumns.addAll(newNoteColumns);
    // clean up the empty rows 
    for (int i = newRows.size() - 1; i >= 0; i--) {
        if (newRows.get(i).isEmpty())
            newRows.remove(i);
    }
    return new HistoryEntry(historyEntryID, project, getBriefDescription(null), this, new MassRowColumnChange(allColumns, newRows));
}
Also used : HashMap(java.util.HashMap) MassRowColumnChange(com.google.refine.model.changes.MassRowColumnChange) ArrayList(java.util.ArrayList) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell)

Example 13 with Cell

use of com.google.refine.model.Cell in project OpenRefine by OpenRefine.

the class ReconClearSimilarCellsOperation method createRowVisitor.

@Override
protected RowVisitor createRowVisitor(final Project project, final List<CellChange> cellChanges, final long historyEntryID) throws Exception {
    Column column = project.columnModel.getColumnByName(_columnName);
    final int cellIndex = column != null ? column.getCellIndex() : -1;
    return new RowVisitor() {

        @Override
        public void start(Project project) {
        // nothing to do
        }

        @Override
        public void end(Project project) {
        // nothing to do
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Cell cell = cellIndex < 0 ? null : row.getCell(cellIndex);
            if (cell != null && cell.recon != null) {
                String value = cell.value instanceof String ? ((String) cell.value) : cell.value.toString();
                if (_similarValue.equals(value)) {
                    Cell newCell = new Cell(cell.value, null);
                    CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
                    cellChanges.add(cellChange);
                }
            }
            return false;
        }
    };
}
Also used : Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) CellChange(com.google.refine.model.changes.CellChange) Row(com.google.refine.model.Row) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell)

Example 14 with Cell

use of com.google.refine.model.Cell in project OpenRefine by OpenRefine.

the class ReconCopyAcrossColumnsOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(final Project project, final long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    final Column fromColumn = project.columnModel.getColumnByName(_fromColumnName);
    final List<Column> toColumns = new ArrayList<Column>(_toColumnNames.length);
    for (String c : _toColumnNames) {
        Column toColumn = project.columnModel.getColumnByName(c);
        if (toColumn != null) {
            toColumns.add(toColumn);
        }
    }
    final Set<Recon.Judgment> judgments = new HashSet<Recon.Judgment>(_judgments.length);
    for (String j : _judgments) {
        judgments.add(Recon.stringToJudgment(j));
    }
    final List<CellChange> cellChanges = new ArrayList<CellChange>(project.rows.size());
    if (fromColumn != null && toColumns.size() > 0) {
        final Map<Object, Recon> cellValueToRecon = new HashMap<Object, Recon>();
        FilteredRows filteredRows = engine.getAllFilteredRows();
        try {
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    Cell cell = row.getCell(fromColumn.getCellIndex());
                    if (cell != null && cell.value != null && cell.recon != null) {
                        if (judgments.contains(cell.recon.judgment)) {
                            cellValueToRecon.put(cell.value, cell.recon);
                        }
                    }
                    return false;
                }
            });
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    for (Column column : toColumns) {
                        int cellIndex = column.getCellIndex();
                        Cell cell = row.getCell(cellIndex);
                        if (cell != null && cell.value != null) {
                            Recon reconToCopy = cellValueToRecon.get(cell.value);
                            boolean judged = cell.recon != null && cell.recon.judgment != Judgment.None;
                            if (reconToCopy != null && (!judged || _applyToJudgedCells)) {
                                Cell newCell = new Cell(cell.value, reconToCopy);
                                CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
                                cellChanges.add(cellChange);
                            }
                        }
                    }
                    return false;
                }
            });
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    String description = "Copy " + cellChanges.size() + " recon judgments from column " + _fromColumnName + " to " + StringUtils.join(_toColumnNames);
    return new HistoryEntry(historyEntryID, project, description, this, new MassChange(cellChanges, false));
}
Also used : HashMap(java.util.HashMap) CellChange(com.google.refine.model.changes.CellChange) ArrayList(java.util.ArrayList) FilteredRows(com.google.refine.browsing.FilteredRows) JSONException(org.json.JSONException) MassChange(com.google.refine.model.changes.MassChange) Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Recon(com.google.refine.model.Recon) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell) Engine(com.google.refine.browsing.Engine) HashSet(java.util.HashSet) Judgment(com.google.refine.model.Recon.Judgment)

Example 15 with Cell

use of com.google.refine.model.Cell in project OpenRefine by OpenRefine.

the class DenormalizeOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    List<Row> newRows = new ArrayList<Row>();
    List<Row> oldRows = project.rows;
    for (int r = 0; r < oldRows.size(); r++) {
        Row oldRow = oldRows.get(r);
        Row newRow = null;
        RowDependency rd = project.recordModel.getRowDependency(r);
        if (rd.cellDependencies != null) {
            newRow = oldRow.dup();
            for (CellDependency cd : rd.cellDependencies) {
                if (cd != null) {
                    int contextRowIndex = cd.rowIndex;
                    int contextCellIndex = cd.cellIndex;
                    if (contextRowIndex >= 0 && contextRowIndex < oldRows.size()) {
                        Row contextRow = oldRows.get(contextRowIndex);
                        Cell contextCell = contextRow.getCell(contextCellIndex);
                        newRow.setCell(contextCellIndex, contextCell);
                    }
                }
            }
        }
        newRows.add(newRow != null ? newRow : oldRow);
    }
    return new HistoryEntry(historyEntryID, project, getBriefDescription(project), DenormalizeOperation.this, new MassRowChange(newRows));
}
Also used : MassRowChange(com.google.refine.model.changes.MassRowChange) CellDependency(com.google.refine.model.RecordModel.CellDependency) ArrayList(java.util.ArrayList) HistoryEntry(com.google.refine.history.HistoryEntry) Row(com.google.refine.model.Row) RowDependency(com.google.refine.model.RecordModel.RowDependency) Cell(com.google.refine.model.Cell)

Aggregations

Cell (com.google.refine.model.Cell)58 Row (com.google.refine.model.Row)36 Column (com.google.refine.model.Column)19 Test (org.testng.annotations.Test)16 RefineTest (com.google.refine.tests.RefineTest)15 BeforeTest (org.testng.annotations.BeforeTest)15 JSONObject (org.json.JSONObject)13 ArrayList (java.util.ArrayList)12 Project (com.google.refine.model.Project)11 IOException (java.io.IOException)11 Properties (java.util.Properties)11 JSONException (org.json.JSONException)9 RowVisitor (com.google.refine.browsing.RowVisitor)7 HistoryEntry (com.google.refine.history.HistoryEntry)7 Serializable (java.io.Serializable)7 Recon (com.google.refine.model.Recon)6 CellChange (com.google.refine.model.changes.CellChange)6 HashMap (java.util.HashMap)6 Evaluable (com.google.refine.expr.Evaluable)5 JSONArray (org.json.JSONArray)4