Search in sources :

Example 6 with HistoryEntry

use of com.google.refine.history.HistoryEntry in project OpenRefine by OpenRefine.

the class KeyValueColumnizeOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    int keyColumnIndex = project.columnModel.getColumnIndexByName(_keyColumnName);
    int valueColumnIndex = project.columnModel.getColumnIndexByName(_valueColumnName);
    int noteColumnIndex = _noteColumnName == null ? -1 : project.columnModel.getColumnIndexByName(_noteColumnName);
    Column keyColumn = project.columnModel.getColumnByName(_keyColumnName);
    Column valueColumn = project.columnModel.getColumnByName(_valueColumnName);
    Column noteColumn = _noteColumnName == null ? null : project.columnModel.getColumnByName(_noteColumnName);
    List<Column> unchangedColumns = new ArrayList<Column>();
    List<Column> oldColumns = project.columnModel.columns;
    for (int i = 0; i < oldColumns.size(); i++) {
        if (i != keyColumnIndex && i != valueColumnIndex && i != noteColumnIndex) {
            unchangedColumns.add(oldColumns.get(i));
        }
    }
    List<Column> newColumns = new ArrayList<Column>();
    List<Column> newNoteColumns = new ArrayList<Column>();
    Map<String, Column> keyValueToColumn = new HashMap<String, Column>();
    Map<String, Column> keyValueToNoteColumn = new HashMap<String, Column>();
    Map<String, Row> groupByCellValuesToRow = new HashMap<String, Row>();
    List<Row> newRows = new ArrayList<Row>();
    List<Row> oldRows = project.rows;
    Row reusableRow = null;
    List<Row> currentRows = new ArrayList<Row>();
    // key which indicates the start of a record
    String recordKey = null;
    if (unchangedColumns.isEmpty()) {
        reusableRow = new Row(1);
        newRows.add(reusableRow);
        currentRows.clear();
        currentRows.add(reusableRow);
    }
    for (int r = 0; r < oldRows.size(); r++) {
        Row oldRow = oldRows.get(r);
        Object key = oldRow.getCellValue(keyColumn.getCellIndex());
        if (!ExpressionUtils.isNonBlankData(key)) {
            if (unchangedColumns.isEmpty()) {
                // For degenerate 2 column case (plus optional note column), 
                // start a new row when we hit a blank line
                reusableRow = new Row(newColumns.size());
                newRows.add(reusableRow);
                currentRows.clear();
                currentRows.add(reusableRow);
            } else {
                // Copy rows with no key
                newRows.add(buildNewRow(unchangedColumns, oldRow, unchangedColumns.size()));
            }
            continue;
        }
        String keyString = key.toString();
        // TODO: Add support for processing in record mode instead of just by rows
        if (keyString.equals(recordKey) || recordKey == null) {
            reusableRow = new Row(newColumns.size());
            newRows.add(reusableRow);
            currentRows.clear();
            currentRows.add(reusableRow);
        }
        Column newColumn = keyValueToColumn.get(keyString);
        if (newColumn == null) {
            // Allocate new column
            newColumn = new Column(project.columnModel.allocateNewCellIndex(), project.columnModel.getUnduplicatedColumnName(keyString));
            keyValueToColumn.put(keyString, newColumn);
            newColumns.add(newColumn);
            // TODO: make customizable?
            if (recordKey == null) {
                recordKey = keyString;
            }
        }
        /*
             * NOTE: If we have additional columns, we currently merge all rows that
             * have identical values in those columns and then add our new columns.
             */
        if (unchangedColumns.size() > 0) {
            StringBuffer sb = new StringBuffer();
            for (int c = 0; c < unchangedColumns.size(); c++) {
                Column unchangedColumn = unchangedColumns.get(c);
                Object cellValue = oldRow.getCellValue(unchangedColumn.getCellIndex());
                if (c > 0) {
                    sb.append('\0');
                }
                if (cellValue != null) {
                    sb.append(cellValue.toString());
                }
            }
            String unchangedCellValues = sb.toString();
            reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
            if (reusableRow == null || reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
                reusableRow = buildNewRow(unchangedColumns, oldRow, newColumn.getCellIndex() + 1);
                groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
                newRows.add(reusableRow);
            }
        }
        Cell cell = oldRow.getCell(valueColumn.getCellIndex());
        if (unchangedColumns.size() == 0) {
            int index = newColumn.getCellIndex();
            Row row = getAvailableRow(currentRows, newRows, index);
            row.setCell(index, cell);
        } else {
            // TODO: support repeating keys in this mode too
            reusableRow.setCell(newColumn.getCellIndex(), cell);
        }
        if (noteColumn != null) {
            Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex());
            if (ExpressionUtils.isNonBlankData(noteValue)) {
                Column newNoteColumn = keyValueToNoteColumn.get(keyString);
                if (newNoteColumn == null) {
                    // Allocate new column
                    newNoteColumn = new Column(project.columnModel.allocateNewCellIndex(), project.columnModel.getUnduplicatedColumnName(noteColumn.getName() + " : " + keyString));
                    keyValueToNoteColumn.put(keyString, newNoteColumn);
                    newNoteColumns.add(newNoteColumn);
                }
                int newNoteCellIndex = newNoteColumn.getCellIndex();
                Object existingNewNoteValue = reusableRow.getCellValue(newNoteCellIndex);
                if (ExpressionUtils.isNonBlankData(existingNewNoteValue)) {
                    Cell concatenatedNoteCell = new Cell(existingNewNoteValue.toString() + ";" + noteValue.toString(), null);
                    reusableRow.setCell(newNoteCellIndex, concatenatedNoteCell);
                } else {
                    reusableRow.setCell(newNoteCellIndex, oldRow.getCell(noteColumn.getCellIndex()));
                }
            }
        }
    }
    List<Column> allColumns = new ArrayList<Column>(unchangedColumns);
    allColumns.addAll(newColumns);
    allColumns.addAll(newNoteColumns);
    // clean up the empty rows 
    for (int i = newRows.size() - 1; i >= 0; i--) {
        if (newRows.get(i).isEmpty())
            newRows.remove(i);
    }
    return new HistoryEntry(historyEntryID, project, getBriefDescription(null), this, new MassRowColumnChange(allColumns, newRows));
}
Also used : HashMap(java.util.HashMap) MassRowColumnChange(com.google.refine.model.changes.MassRowColumnChange) ArrayList(java.util.ArrayList) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell)

Example 7 with HistoryEntry

use of com.google.refine.history.HistoryEntry in project OpenRefine by OpenRefine.

the class ColumnSplitOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    Column column = project.columnModel.getColumnByName(_columnName);
    if (column == null) {
        throw new Exception("No column named " + _columnName);
    }
    List<String> columnNames = new ArrayList<String>();
    List<Integer> rowIndices = new ArrayList<Integer>(project.rows.size());
    List<List<Serializable>> tuples = new ArrayList<List<Serializable>>(project.rows.size());
    FilteredRows filteredRows = engine.getAllFilteredRows();
    RowVisitor rowVisitor;
    if ("lengths".equals(_mode)) {
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            @Override
            protected java.util.List<Serializable> split(String s) {
                List<Serializable> results = new ArrayList<Serializable>(_fieldLengths.length + 1);
                int lastIndex = 0;
                for (int length : _fieldLengths) {
                    int from = lastIndex;
                    int to = Math.min(from + length, s.length());
                    results.add(stringToValue(s.substring(from, to)));
                    lastIndex = to;
                }
                return results;
            }

            ;
        };
    } else if (_regex) {
        Pattern pattern = Pattern.compile(_separator);
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            Pattern _pattern;

            @Override
            protected java.util.List<Serializable> split(String s) {
                return stringArrayToValueList(_pattern.split(s, _maxColumns));
            }

            ;

            public RowVisitor init(Pattern pattern) {
                _pattern = pattern;
                return this;
            }
        }.init(pattern);
    } else {
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            @Override
            protected java.util.List<Serializable> split(String s) {
                return stringArrayToValueList(StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator, _maxColumns));
            }

            ;
        };
    }
    filteredRows.accept(project, rowVisitor);
    String description = "Split " + rowIndices.size() + " cell(s) in column " + _columnName + " into several columns" + ("separator".equals(_mode) ? " by separator" : " by field lengths");
    Change change = new ColumnSplitChange(_columnName, columnNames, rowIndices, tuples, _removeOriginalColumn);
    return new HistoryEntry(historyEntryID, project, description, this, change);
}
Also used : Pattern(java.util.regex.Pattern) Serializable(java.io.Serializable) ColumnSplitChange(com.google.refine.model.changes.ColumnSplitChange) ArrayList(java.util.ArrayList) Change(com.google.refine.history.Change) ColumnSplitChange(com.google.refine.model.changes.ColumnSplitChange) FilteredRows(com.google.refine.browsing.FilteredRows) JSONException(org.json.JSONException) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) ArrayList(java.util.ArrayList) List(java.util.List) RowVisitor(com.google.refine.browsing.RowVisitor) Engine(com.google.refine.browsing.Engine)

Example 8 with HistoryEntry

use of com.google.refine.history.HistoryEntry in project OpenRefine by OpenRefine.

the class ReconCopyAcrossColumnsOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(final Project project, final long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    final Column fromColumn = project.columnModel.getColumnByName(_fromColumnName);
    final List<Column> toColumns = new ArrayList<Column>(_toColumnNames.length);
    for (String c : _toColumnNames) {
        Column toColumn = project.columnModel.getColumnByName(c);
        if (toColumn != null) {
            toColumns.add(toColumn);
        }
    }
    final Set<Recon.Judgment> judgments = new HashSet<Recon.Judgment>(_judgments.length);
    for (String j : _judgments) {
        judgments.add(Recon.stringToJudgment(j));
    }
    final List<CellChange> cellChanges = new ArrayList<CellChange>(project.rows.size());
    if (fromColumn != null && toColumns.size() > 0) {
        final Map<Object, Recon> cellValueToRecon = new HashMap<Object, Recon>();
        FilteredRows filteredRows = engine.getAllFilteredRows();
        try {
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    Cell cell = row.getCell(fromColumn.getCellIndex());
                    if (cell != null && cell.value != null && cell.recon != null) {
                        if (judgments.contains(cell.recon.judgment)) {
                            cellValueToRecon.put(cell.value, cell.recon);
                        }
                    }
                    return false;
                }
            });
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    for (Column column : toColumns) {
                        int cellIndex = column.getCellIndex();
                        Cell cell = row.getCell(cellIndex);
                        if (cell != null && cell.value != null) {
                            Recon reconToCopy = cellValueToRecon.get(cell.value);
                            boolean judged = cell.recon != null && cell.recon.judgment != Judgment.None;
                            if (reconToCopy != null && (!judged || _applyToJudgedCells)) {
                                Cell newCell = new Cell(cell.value, reconToCopy);
                                CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
                                cellChanges.add(cellChange);
                            }
                        }
                    }
                    return false;
                }
            });
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    String description = "Copy " + cellChanges.size() + " recon judgments from column " + _fromColumnName + " to " + StringUtils.join(_toColumnNames);
    return new HistoryEntry(historyEntryID, project, description, this, new MassChange(cellChanges, false));
}
Also used : HashMap(java.util.HashMap) CellChange(com.google.refine.model.changes.CellChange) ArrayList(java.util.ArrayList) FilteredRows(com.google.refine.browsing.FilteredRows) JSONException(org.json.JSONException) MassChange(com.google.refine.model.changes.MassChange) Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Recon(com.google.refine.model.Recon) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell) Engine(com.google.refine.browsing.Engine) HashSet(java.util.HashSet) Judgment(com.google.refine.model.Recon.Judgment)

Example 9 with HistoryEntry

use of com.google.refine.history.HistoryEntry in project OpenRefine by OpenRefine.

the class DenormalizeOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    List<Row> newRows = new ArrayList<Row>();
    List<Row> oldRows = project.rows;
    for (int r = 0; r < oldRows.size(); r++) {
        Row oldRow = oldRows.get(r);
        Row newRow = null;
        RowDependency rd = project.recordModel.getRowDependency(r);
        if (rd.cellDependencies != null) {
            newRow = oldRow.dup();
            for (CellDependency cd : rd.cellDependencies) {
                if (cd != null) {
                    int contextRowIndex = cd.rowIndex;
                    int contextCellIndex = cd.cellIndex;
                    if (contextRowIndex >= 0 && contextRowIndex < oldRows.size()) {
                        Row contextRow = oldRows.get(contextRowIndex);
                        Cell contextCell = contextRow.getCell(contextCellIndex);
                        newRow.setCell(contextCellIndex, contextCell);
                    }
                }
            }
        }
        newRows.add(newRow != null ? newRow : oldRow);
    }
    return new HistoryEntry(historyEntryID, project, getBriefDescription(project), DenormalizeOperation.this, new MassRowChange(newRows));
}
Also used : MassRowChange(com.google.refine.model.changes.MassRowChange) CellDependency(com.google.refine.model.RecordModel.CellDependency) ArrayList(java.util.ArrayList) HistoryEntry(com.google.refine.history.HistoryEntry) Row(com.google.refine.model.Row) RowDependency(com.google.refine.model.RecordModel.RowDependency) Cell(com.google.refine.model.Cell)

Example 10 with HistoryEntry

use of com.google.refine.history.HistoryEntry in project OpenRefine by OpenRefine.

the class RowFlagOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    List<Change> changes = new ArrayList<Change>(project.rows.size());
    FilteredRows filteredRows = engine.getAllFilteredRows();
    filteredRows.accept(project, createRowVisitor(project, changes));
    return new HistoryEntry(historyEntryID, project, (_flagged ? "Flag" : "Unflag") + " " + changes.size() + " rows", this, new MassChange(changes, false));
}
Also used : MassChange(com.google.refine.model.changes.MassChange) ArrayList(java.util.ArrayList) HistoryEntry(com.google.refine.history.HistoryEntry) RowFlagChange(com.google.refine.model.changes.RowFlagChange) Change(com.google.refine.history.Change) MassChange(com.google.refine.model.changes.MassChange) FilteredRows(com.google.refine.browsing.FilteredRows) Engine(com.google.refine.browsing.Engine)

Aggregations

HistoryEntry (com.google.refine.history.HistoryEntry)22 ArrayList (java.util.ArrayList)15 Column (com.google.refine.model.Column)10 Engine (com.google.refine.browsing.Engine)8 JSONException (org.json.JSONException)8 FilteredRows (com.google.refine.browsing.FilteredRows)7 Cell (com.google.refine.model.Cell)7 Row (com.google.refine.model.Row)7 Project (com.google.refine.model.Project)6 Properties (java.util.Properties)6 Change (com.google.refine.history.Change)5 JSONWriter (org.json.JSONWriter)5 JSONObject (org.json.JSONObject)4 RowVisitor (com.google.refine.browsing.RowVisitor)3 MassChange (com.google.refine.model.changes.MassChange)3 MassRowChange (com.google.refine.model.changes.MassRowChange)3 MassRowColumnChange (com.google.refine.model.changes.MassRowColumnChange)3 Pool (com.google.refine.util.Pool)3 IOException (java.io.IOException)3 ServletException (javax.servlet.ServletException)3