Search in sources :

Example 1 with MassRowColumnChange

use of com.google.refine.model.changes.MassRowColumnChange in project OpenRefine by OpenRefine.

the class TransposeRowsIntoColumnsOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    List<Column> newColumns = new ArrayList<Column>();
    List<Column> oldColumns = project.columnModel.columns;
    int columnIndex = project.columnModel.getColumnIndexByName(_columnName);
    int columnCount = oldColumns.size();
    for (int i = 0; i < columnCount; i++) {
        Column column = oldColumns.get(i);
        if (i == columnIndex) {
            int newIndex = 1;
            for (int n = 0; n < _rowCount; n++) {
                String columnName = _columnName + " " + newIndex++;
                while (project.columnModel.getColumnByName(columnName) != null) {
                    columnName = _columnName + " " + newIndex++;
                }
                newColumns.add(new Column(i + n, columnName));
            }
        } else if (i < columnIndex) {
            newColumns.add(new Column(i, column.getName()));
        } else {
            newColumns.add(new Column(i + _rowCount - 1, column.getName()));
        }
    }
    List<Row> oldRows = project.rows;
    List<Row> newRows = new ArrayList<Row>(oldRows.size() / _rowCount);
    for (int r = 0; r < oldRows.size(); r += _rowCount) {
        Row firstNewRow = new Row(newColumns.size());
        for (int r2 = 0; r2 < _rowCount && r + r2 < oldRows.size(); r2++) {
            Row oldRow = oldRows.get(r + r2);
            Row newRow = r2 == 0 ? firstNewRow : new Row(newColumns.size());
            boolean hasData = r2 == 0;
            for (int c = 0; c < oldColumns.size(); c++) {
                Column column = oldColumns.get(c);
                Cell cell = oldRow.getCell(column.getCellIndex());
                if (cell != null && cell.value != null) {
                    if (c == columnIndex) {
                        firstNewRow.setCell(columnIndex + r2, cell);
                    } else if (c < columnIndex) {
                        newRow.setCell(c, cell);
                        hasData = true;
                    } else {
                        newRow.setCell(c + _rowCount - 1, cell);
                        hasData = true;
                    }
                }
            }
            if (hasData) {
                newRows.add(newRow);
            }
        }
    }
    return new HistoryEntry(historyEntryID, project, getBriefDescription(null), this, new MassRowColumnChange(newColumns, newRows));
}
Also used : Column(com.google.refine.model.Column) MassRowColumnChange(com.google.refine.model.changes.MassRowColumnChange) ArrayList(java.util.ArrayList) HistoryEntry(com.google.refine.history.HistoryEntry) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell)

Example 2 with MassRowColumnChange

use of com.google.refine.model.changes.MassRowColumnChange in project OpenRefine by OpenRefine.

the class KeyValueColumnizeOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    int keyColumnIndex = project.columnModel.getColumnIndexByName(_keyColumnName);
    int valueColumnIndex = project.columnModel.getColumnIndexByName(_valueColumnName);
    int noteColumnIndex = _noteColumnName == null ? -1 : project.columnModel.getColumnIndexByName(_noteColumnName);
    Column keyColumn = project.columnModel.getColumnByName(_keyColumnName);
    Column valueColumn = project.columnModel.getColumnByName(_valueColumnName);
    Column noteColumn = _noteColumnName == null ? null : project.columnModel.getColumnByName(_noteColumnName);
    List<Column> unchangedColumns = new ArrayList<Column>();
    List<Column> oldColumns = project.columnModel.columns;
    for (int i = 0; i < oldColumns.size(); i++) {
        if (i != keyColumnIndex && i != valueColumnIndex && i != noteColumnIndex) {
            unchangedColumns.add(oldColumns.get(i));
        }
    }
    List<Column> newColumns = new ArrayList<Column>();
    List<Column> newNoteColumns = new ArrayList<Column>();
    Map<String, Column> keyValueToColumn = new HashMap<String, Column>();
    Map<String, Column> keyValueToNoteColumn = new HashMap<String, Column>();
    Map<String, Row> groupByCellValuesToRow = new HashMap<String, Row>();
    List<Row> newRows = new ArrayList<Row>();
    List<Row> oldRows = project.rows;
    Row reusableRow = null;
    List<Row> currentRows = new ArrayList<Row>();
    // key which indicates the start of a record
    String recordKey = null;
    if (unchangedColumns.isEmpty()) {
        reusableRow = new Row(1);
        newRows.add(reusableRow);
        currentRows.clear();
        currentRows.add(reusableRow);
    }
    for (int r = 0; r < oldRows.size(); r++) {
        Row oldRow = oldRows.get(r);
        Object key = oldRow.getCellValue(keyColumn.getCellIndex());
        if (!ExpressionUtils.isNonBlankData(key)) {
            if (unchangedColumns.isEmpty()) {
                // For degenerate 2 column case (plus optional note column), 
                // start a new row when we hit a blank line
                reusableRow = new Row(newColumns.size());
                newRows.add(reusableRow);
                currentRows.clear();
                currentRows.add(reusableRow);
            } else {
                // Copy rows with no key
                newRows.add(buildNewRow(unchangedColumns, oldRow, unchangedColumns.size()));
            }
            continue;
        }
        String keyString = key.toString();
        // TODO: Add support for processing in record mode instead of just by rows
        if (keyString.equals(recordKey) || recordKey == null) {
            reusableRow = new Row(newColumns.size());
            newRows.add(reusableRow);
            currentRows.clear();
            currentRows.add(reusableRow);
        }
        Column newColumn = keyValueToColumn.get(keyString);
        if (newColumn == null) {
            // Allocate new column
            newColumn = new Column(project.columnModel.allocateNewCellIndex(), project.columnModel.getUnduplicatedColumnName(keyString));
            keyValueToColumn.put(keyString, newColumn);
            newColumns.add(newColumn);
            // TODO: make customizable?
            if (recordKey == null) {
                recordKey = keyString;
            }
        }
        /*
             * NOTE: If we have additional columns, we currently merge all rows that
             * have identical values in those columns and then add our new columns.
             */
        if (unchangedColumns.size() > 0) {
            StringBuffer sb = new StringBuffer();
            for (int c = 0; c < unchangedColumns.size(); c++) {
                Column unchangedColumn = unchangedColumns.get(c);
                Object cellValue = oldRow.getCellValue(unchangedColumn.getCellIndex());
                if (c > 0) {
                    sb.append('\0');
                }
                if (cellValue != null) {
                    sb.append(cellValue.toString());
                }
            }
            String unchangedCellValues = sb.toString();
            reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
            if (reusableRow == null || reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
                reusableRow = buildNewRow(unchangedColumns, oldRow, newColumn.getCellIndex() + 1);
                groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
                newRows.add(reusableRow);
            }
        }
        Cell cell = oldRow.getCell(valueColumn.getCellIndex());
        if (unchangedColumns.size() == 0) {
            int index = newColumn.getCellIndex();
            Row row = getAvailableRow(currentRows, newRows, index);
            row.setCell(index, cell);
        } else {
            // TODO: support repeating keys in this mode too
            reusableRow.setCell(newColumn.getCellIndex(), cell);
        }
        if (noteColumn != null) {
            Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex());
            if (ExpressionUtils.isNonBlankData(noteValue)) {
                Column newNoteColumn = keyValueToNoteColumn.get(keyString);
                if (newNoteColumn == null) {
                    // Allocate new column
                    newNoteColumn = new Column(project.columnModel.allocateNewCellIndex(), project.columnModel.getUnduplicatedColumnName(noteColumn.getName() + " : " + keyString));
                    keyValueToNoteColumn.put(keyString, newNoteColumn);
                    newNoteColumns.add(newNoteColumn);
                }
                int newNoteCellIndex = newNoteColumn.getCellIndex();
                Object existingNewNoteValue = reusableRow.getCellValue(newNoteCellIndex);
                if (ExpressionUtils.isNonBlankData(existingNewNoteValue)) {
                    Cell concatenatedNoteCell = new Cell(existingNewNoteValue.toString() + ";" + noteValue.toString(), null);
                    reusableRow.setCell(newNoteCellIndex, concatenatedNoteCell);
                } else {
                    reusableRow.setCell(newNoteCellIndex, oldRow.getCell(noteColumn.getCellIndex()));
                }
            }
        }
    }
    List<Column> allColumns = new ArrayList<Column>(unchangedColumns);
    allColumns.addAll(newColumns);
    allColumns.addAll(newNoteColumns);
    // clean up the empty rows 
    for (int i = newRows.size() - 1; i >= 0; i--) {
        if (newRows.get(i).isEmpty())
            newRows.remove(i);
    }
    return new HistoryEntry(historyEntryID, project, getBriefDescription(null), this, new MassRowColumnChange(allColumns, newRows));
}
Also used : HashMap(java.util.HashMap) MassRowColumnChange(com.google.refine.model.changes.MassRowColumnChange) ArrayList(java.util.ArrayList) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell)

Example 3 with MassRowColumnChange

use of com.google.refine.model.changes.MassRowColumnChange in project OpenRefine by OpenRefine.

the class TransposeColumnsIntoRowsOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    if (_combinedColumnName != null) {
        if (project.columnModel.getColumnByName(_combinedColumnName) != null) {
            throw new Exception("Another column already named " + _combinedColumnName);
        }
    } else {
        if (project.columnModel.getColumnByName(_keyColumnName) != null) {
            throw new Exception("Another column already named " + _keyColumnName);
        }
        if (project.columnModel.getColumnByName(_valueColumnName) != null) {
            throw new Exception("Another column already named " + _valueColumnName);
        }
    }
    List<Column> newColumns = new ArrayList<Column>();
    List<Column> oldColumns = project.columnModel.columns;
    int startColumnIndex = oldColumns.size();
    int columnCount = _columnCount;
    if (_columnCount > 0) {
        int columnsLeftToTranspose = _columnCount;
        for (int c = 0; c < oldColumns.size(); c++) {
            Column column = oldColumns.get(c);
            if (columnsLeftToTranspose == 0) {
                // This column is beyond the columns to transpose
                Column newColumn = new Column(newColumns.size(), column.getOriginalHeaderLabel());
                newColumn.setName(column.getName());
                newColumns.add(newColumn);
            } else if (columnsLeftToTranspose < _columnCount) {
                // This column is a column to transpose, but not the first
                // nothing to do
                columnsLeftToTranspose--;
            } else if (_startColumnName.equals(column.getName())) {
                // This is the first column to transpose
                startColumnIndex = c;
                if (_combinedColumnName != null) {
                    newColumns.add(new Column(newColumns.size(), _combinedColumnName));
                } else {
                    newColumns.add(new Column(newColumns.size(), _keyColumnName));
                    newColumns.add(new Column(newColumns.size(), _valueColumnName));
                }
                columnsLeftToTranspose--;
            } else {
                // This column is before all columns to transpose
                Column newColumn = new Column(newColumns.size(), column.getOriginalHeaderLabel());
                newColumn.setName(column.getName());
                newColumns.add(newColumn);
            }
        }
    } else {
        for (int c = 0; c < oldColumns.size(); c++) {
            Column column = oldColumns.get(c);
            if (_startColumnName.equals(column.getName())) {
                // This is the first column to transpose
                startColumnIndex = c;
                if (_combinedColumnName != null) {
                    newColumns.add(new Column(newColumns.size(), _combinedColumnName));
                } else {
                    newColumns.add(new Column(newColumns.size(), _keyColumnName));
                    newColumns.add(new Column(newColumns.size(), _valueColumnName));
                }
                break;
            } else {
                // This column is before all columns to transpose
                Column newColumn = new Column(newColumns.size(), column.getOriginalHeaderLabel());
                newColumn.setName(column.getName());
                newColumns.add(newColumn);
            }
        }
        columnCount = oldColumns.size() - startColumnIndex;
    }
    List<Row> oldRows = project.rows;
    List<Row> newRows = new ArrayList<Row>(oldRows.size() * columnCount);
    for (int r = 0; r < oldRows.size(); r++) {
        Row oldRow = project.rows.get(r);
        Row firstNewRow = new Row(newColumns.size());
        int firstNewRowIndex = newRows.size();
        newRows.add(firstNewRow);
        int transposedCells = 0;
        for (int c = 0; c < oldColumns.size(); c++) {
            Column column = oldColumns.get(c);
            Cell cell = oldRow.getCell(column.getCellIndex());
            if (c < startColumnIndex) {
                firstNewRow.setCell(c, cell);
            } else if (c == startColumnIndex || c < startColumnIndex + columnCount) {
                if (_combinedColumnName != null) {
                    Cell newCell;
                    if (cell == null || cell.value == null) {
                        if (_prependColumnName && !_ignoreBlankCells) {
                            newCell = new Cell(column.getName() + _separator, null);
                        } else {
                            continue;
                        }
                    } else if (_prependColumnName) {
                        newCell = new Cell(column.getName() + _separator + cell.value, null);
                    } else {
                        newCell = cell;
                    }
                    Row rowToModify;
                    if (transposedCells == 0) {
                        rowToModify = firstNewRow;
                    } else {
                        rowToModify = new Row(newColumns.size());
                        newRows.add(rowToModify);
                    }
                    rowToModify.setCell(startColumnIndex, newCell);
                    transposedCells++;
                } else {
                    if (_ignoreBlankCells && (cell == null || cell.value == null)) {
                        continue;
                    }
                    Row rowToModify;
                    if (transposedCells == 0) {
                        rowToModify = firstNewRow;
                    } else {
                        rowToModify = new Row(newColumns.size());
                        newRows.add(rowToModify);
                    }
                    rowToModify.setCell(startColumnIndex, new Cell(column.getName(), null));
                    rowToModify.setCell(startColumnIndex + 1, cell);
                    transposedCells++;
                }
            } else {
                firstNewRow.setCell(c - columnCount + (_combinedColumnName != null ? 1 : 2), cell);
            }
        }
        if (_fillDown) {
            for (int r2 = firstNewRowIndex + 1; r2 < newRows.size(); r2++) {
                Row newRow = newRows.get(r2);
                for (int c = 0; c < newColumns.size(); c++) {
                    if (c < startColumnIndex || (_combinedColumnName != null ? c > startColumnIndex : c > startColumnIndex + 1)) {
                        Column column = newColumns.get(c);
                        int cellIndex = column.getCellIndex();
                        Cell cellToCopy = firstNewRow.getCell(cellIndex);
                        if (cellToCopy != null && newRow.getCell(cellIndex) == null) {
                            newRow.setCell(cellIndex, cellToCopy);
                        }
                    }
                }
            }
        }
    }
    return new HistoryEntry(historyEntryID, project, getBriefDescription(), this, new MassRowColumnChange(newColumns, newRows));
}
Also used : Column(com.google.refine.model.Column) MassRowColumnChange(com.google.refine.model.changes.MassRowColumnChange) ArrayList(java.util.ArrayList) HistoryEntry(com.google.refine.history.HistoryEntry) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell) JSONException(org.json.JSONException)

Aggregations

HistoryEntry (com.google.refine.history.HistoryEntry)3 Cell (com.google.refine.model.Cell)3 Column (com.google.refine.model.Column)3 Row (com.google.refine.model.Row)3 MassRowColumnChange (com.google.refine.model.changes.MassRowColumnChange)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)1 JSONException (org.json.JSONException)1 JSONObject (org.json.JSONObject)1