use of com.google.refine.history.HistoryEntry in project OpenRefine by OpenRefine.
the class KeyValueColumnizeOperation method createHistoryEntry.
@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
int keyColumnIndex = project.columnModel.getColumnIndexByName(_keyColumnName);
int valueColumnIndex = project.columnModel.getColumnIndexByName(_valueColumnName);
int noteColumnIndex = _noteColumnName == null ? -1 : project.columnModel.getColumnIndexByName(_noteColumnName);
Column keyColumn = project.columnModel.getColumnByName(_keyColumnName);
Column valueColumn = project.columnModel.getColumnByName(_valueColumnName);
Column noteColumn = _noteColumnName == null ? null : project.columnModel.getColumnByName(_noteColumnName);
List<Column> unchangedColumns = new ArrayList<Column>();
List<Column> oldColumns = project.columnModel.columns;
for (int i = 0; i < oldColumns.size(); i++) {
if (i != keyColumnIndex && i != valueColumnIndex && i != noteColumnIndex) {
unchangedColumns.add(oldColumns.get(i));
}
}
List<Column> newColumns = new ArrayList<Column>();
List<Column> newNoteColumns = new ArrayList<Column>();
Map<String, Column> keyValueToColumn = new HashMap<String, Column>();
Map<String, Column> keyValueToNoteColumn = new HashMap<String, Column>();
Map<String, Row> groupByCellValuesToRow = new HashMap<String, Row>();
List<Row> newRows = new ArrayList<Row>();
List<Row> oldRows = project.rows;
Row reusableRow = null;
List<Row> currentRows = new ArrayList<Row>();
// key which indicates the start of a record
String recordKey = null;
if (unchangedColumns.isEmpty()) {
reusableRow = new Row(1);
newRows.add(reusableRow);
currentRows.clear();
currentRows.add(reusableRow);
}
for (int r = 0; r < oldRows.size(); r++) {
Row oldRow = oldRows.get(r);
Object key = oldRow.getCellValue(keyColumn.getCellIndex());
if (!ExpressionUtils.isNonBlankData(key)) {
if (unchangedColumns.isEmpty()) {
// For degenerate 2 column case (plus optional note column),
// start a new row when we hit a blank line
reusableRow = new Row(newColumns.size());
newRows.add(reusableRow);
currentRows.clear();
currentRows.add(reusableRow);
} else {
// Copy rows with no key
newRows.add(buildNewRow(unchangedColumns, oldRow, unchangedColumns.size()));
}
continue;
}
String keyString = key.toString();
// TODO: Add support for processing in record mode instead of just by rows
if (keyString.equals(recordKey) || recordKey == null) {
reusableRow = new Row(newColumns.size());
newRows.add(reusableRow);
currentRows.clear();
currentRows.add(reusableRow);
}
Column newColumn = keyValueToColumn.get(keyString);
if (newColumn == null) {
// Allocate new column
newColumn = new Column(project.columnModel.allocateNewCellIndex(), project.columnModel.getUnduplicatedColumnName(keyString));
keyValueToColumn.put(keyString, newColumn);
newColumns.add(newColumn);
// TODO: make customizable?
if (recordKey == null) {
recordKey = keyString;
}
}
/*
* NOTE: If we have additional columns, we currently merge all rows that
* have identical values in those columns and then add our new columns.
*/
if (unchangedColumns.size() > 0) {
StringBuffer sb = new StringBuffer();
for (int c = 0; c < unchangedColumns.size(); c++) {
Column unchangedColumn = unchangedColumns.get(c);
Object cellValue = oldRow.getCellValue(unchangedColumn.getCellIndex());
if (c > 0) {
sb.append('\0');
}
if (cellValue != null) {
sb.append(cellValue.toString());
}
}
String unchangedCellValues = sb.toString();
reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
if (reusableRow == null || reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
reusableRow = buildNewRow(unchangedColumns, oldRow, newColumn.getCellIndex() + 1);
groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
newRows.add(reusableRow);
}
}
Cell cell = oldRow.getCell(valueColumn.getCellIndex());
if (unchangedColumns.size() == 0) {
int index = newColumn.getCellIndex();
Row row = getAvailableRow(currentRows, newRows, index);
row.setCell(index, cell);
} else {
// TODO: support repeating keys in this mode too
reusableRow.setCell(newColumn.getCellIndex(), cell);
}
if (noteColumn != null) {
Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex());
if (ExpressionUtils.isNonBlankData(noteValue)) {
Column newNoteColumn = keyValueToNoteColumn.get(keyString);
if (newNoteColumn == null) {
// Allocate new column
newNoteColumn = new Column(project.columnModel.allocateNewCellIndex(), project.columnModel.getUnduplicatedColumnName(noteColumn.getName() + " : " + keyString));
keyValueToNoteColumn.put(keyString, newNoteColumn);
newNoteColumns.add(newNoteColumn);
}
int newNoteCellIndex = newNoteColumn.getCellIndex();
Object existingNewNoteValue = reusableRow.getCellValue(newNoteCellIndex);
if (ExpressionUtils.isNonBlankData(existingNewNoteValue)) {
Cell concatenatedNoteCell = new Cell(existingNewNoteValue.toString() + ";" + noteValue.toString(), null);
reusableRow.setCell(newNoteCellIndex, concatenatedNoteCell);
} else {
reusableRow.setCell(newNoteCellIndex, oldRow.getCell(noteColumn.getCellIndex()));
}
}
}
}
List<Column> allColumns = new ArrayList<Column>(unchangedColumns);
allColumns.addAll(newColumns);
allColumns.addAll(newNoteColumns);
// clean up the empty rows
for (int i = newRows.size() - 1; i >= 0; i--) {
if (newRows.get(i).isEmpty())
newRows.remove(i);
}
return new HistoryEntry(historyEntryID, project, getBriefDescription(null), this, new MassRowColumnChange(allColumns, newRows));
}
use of com.google.refine.history.HistoryEntry in project OpenRefine by OpenRefine.
the class ColumnSplitOperation method createHistoryEntry.
@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
Engine engine = createEngine(project);
Column column = project.columnModel.getColumnByName(_columnName);
if (column == null) {
throw new Exception("No column named " + _columnName);
}
List<String> columnNames = new ArrayList<String>();
List<Integer> rowIndices = new ArrayList<Integer>(project.rows.size());
List<List<Serializable>> tuples = new ArrayList<List<Serializable>>(project.rows.size());
FilteredRows filteredRows = engine.getAllFilteredRows();
RowVisitor rowVisitor;
if ("lengths".equals(_mode)) {
rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {
@Override
protected java.util.List<Serializable> split(String s) {
List<Serializable> results = new ArrayList<Serializable>(_fieldLengths.length + 1);
int lastIndex = 0;
for (int length : _fieldLengths) {
int from = lastIndex;
int to = Math.min(from + length, s.length());
results.add(stringToValue(s.substring(from, to)));
lastIndex = to;
}
return results;
}
;
};
} else if (_regex) {
Pattern pattern = Pattern.compile(_separator);
rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {
Pattern _pattern;
@Override
protected java.util.List<Serializable> split(String s) {
return stringArrayToValueList(_pattern.split(s, _maxColumns));
}
;
public RowVisitor init(Pattern pattern) {
_pattern = pattern;
return this;
}
}.init(pattern);
} else {
rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {
@Override
protected java.util.List<Serializable> split(String s) {
return stringArrayToValueList(StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator, _maxColumns));
}
;
};
}
filteredRows.accept(project, rowVisitor);
String description = "Split " + rowIndices.size() + " cell(s) in column " + _columnName + " into several columns" + ("separator".equals(_mode) ? " by separator" : " by field lengths");
Change change = new ColumnSplitChange(_columnName, columnNames, rowIndices, tuples, _removeOriginalColumn);
return new HistoryEntry(historyEntryID, project, description, this, change);
}
use of com.google.refine.history.HistoryEntry in project OpenRefine by OpenRefine.
the class ReconCopyAcrossColumnsOperation method createHistoryEntry.
@Override
protected HistoryEntry createHistoryEntry(final Project project, final long historyEntryID) throws Exception {
Engine engine = createEngine(project);
final Column fromColumn = project.columnModel.getColumnByName(_fromColumnName);
final List<Column> toColumns = new ArrayList<Column>(_toColumnNames.length);
for (String c : _toColumnNames) {
Column toColumn = project.columnModel.getColumnByName(c);
if (toColumn != null) {
toColumns.add(toColumn);
}
}
final Set<Recon.Judgment> judgments = new HashSet<Recon.Judgment>(_judgments.length);
for (String j : _judgments) {
judgments.add(Recon.stringToJudgment(j));
}
final List<CellChange> cellChanges = new ArrayList<CellChange>(project.rows.size());
if (fromColumn != null && toColumns.size() > 0) {
final Map<Object, Recon> cellValueToRecon = new HashMap<Object, Recon>();
FilteredRows filteredRows = engine.getAllFilteredRows();
try {
filteredRows.accept(project, new RowVisitor() {
@Override
public void start(Project project) {
// nothing to do
}
@Override
public void end(Project project) {
// nothing to do
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
Cell cell = row.getCell(fromColumn.getCellIndex());
if (cell != null && cell.value != null && cell.recon != null) {
if (judgments.contains(cell.recon.judgment)) {
cellValueToRecon.put(cell.value, cell.recon);
}
}
return false;
}
});
filteredRows.accept(project, new RowVisitor() {
@Override
public void start(Project project) {
// nothing to do
}
@Override
public void end(Project project) {
// nothing to do
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
for (Column column : toColumns) {
int cellIndex = column.getCellIndex();
Cell cell = row.getCell(cellIndex);
if (cell != null && cell.value != null) {
Recon reconToCopy = cellValueToRecon.get(cell.value);
boolean judged = cell.recon != null && cell.recon.judgment != Judgment.None;
if (reconToCopy != null && (!judged || _applyToJudgedCells)) {
Cell newCell = new Cell(cell.value, reconToCopy);
CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
cellChanges.add(cellChange);
}
}
}
return false;
}
});
} catch (Exception e) {
e.printStackTrace();
}
}
String description = "Copy " + cellChanges.size() + " recon judgments from column " + _fromColumnName + " to " + StringUtils.join(_toColumnNames);
return new HistoryEntry(historyEntryID, project, description, this, new MassChange(cellChanges, false));
}
use of com.google.refine.history.HistoryEntry in project OpenRefine by OpenRefine.
the class DenormalizeOperation method createHistoryEntry.
@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
List<Row> newRows = new ArrayList<Row>();
List<Row> oldRows = project.rows;
for (int r = 0; r < oldRows.size(); r++) {
Row oldRow = oldRows.get(r);
Row newRow = null;
RowDependency rd = project.recordModel.getRowDependency(r);
if (rd.cellDependencies != null) {
newRow = oldRow.dup();
for (CellDependency cd : rd.cellDependencies) {
if (cd != null) {
int contextRowIndex = cd.rowIndex;
int contextCellIndex = cd.cellIndex;
if (contextRowIndex >= 0 && contextRowIndex < oldRows.size()) {
Row contextRow = oldRows.get(contextRowIndex);
Cell contextCell = contextRow.getCell(contextCellIndex);
newRow.setCell(contextCellIndex, contextCell);
}
}
}
}
newRows.add(newRow != null ? newRow : oldRow);
}
return new HistoryEntry(historyEntryID, project, getBriefDescription(project), DenormalizeOperation.this, new MassRowChange(newRows));
}
use of com.google.refine.history.HistoryEntry in project OpenRefine by OpenRefine.
the class RowFlagOperation method createHistoryEntry.
@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
Engine engine = createEngine(project);
List<Change> changes = new ArrayList<Change>(project.rows.size());
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, createRowVisitor(project, changes));
return new HistoryEntry(historyEntryID, project, (_flagged ? "Flag" : "Unflag") + " " + changes.size() + " rows", this, new MassChange(changes, false));
}
Aggregations