Search in sources :

Example 1 with ColumnSplitChange

use of com.google.refine.model.changes.ColumnSplitChange in project OpenRefine by OpenRefine.

the class ColumnSplitOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    Column column = project.columnModel.getColumnByName(_columnName);
    if (column == null) {
        throw new Exception("No column named " + _columnName);
    }
    List<String> columnNames = new ArrayList<String>();
    List<Integer> rowIndices = new ArrayList<Integer>(project.rows.size());
    List<List<Serializable>> tuples = new ArrayList<List<Serializable>>(project.rows.size());
    FilteredRows filteredRows = engine.getAllFilteredRows();
    RowVisitor rowVisitor;
    if ("lengths".equals(_mode)) {
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            @Override
            protected java.util.List<Serializable> split(String s) {
                List<Serializable> results = new ArrayList<Serializable>(_fieldLengths.length + 1);
                int lastIndex = 0;
                for (int length : _fieldLengths) {
                    int from = lastIndex;
                    int to = Math.min(from + length, s.length());
                    results.add(stringToValue(s.substring(from, to)));
                    lastIndex = to;
                }
                return results;
            }
        };
    } else if (_regex) {
        Pattern pattern = Pattern.compile(_separator);
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            Pattern _pattern;

            @Override
            protected java.util.List<Serializable> split(String s) {
                return stringArrayToValueList(_pattern.split(s, _maxColumns));
            }

            public RowVisitor init(Pattern pattern) {
                _pattern = pattern;
                return this;
            }
        }.init(pattern);
    } else {
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            @Override
            protected java.util.List<Serializable> split(String s) {
                return stringArrayToValueList(StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator, _maxColumns));
            }
        };
    }
    filteredRows.accept(project, rowVisitor);
    String description = "Split " + rowIndices.size() + " cell(s) in column " + _columnName + " into several columns" + ("separator".equals(_mode) ? " by separator" : " by field lengths");
    Change change = new ColumnSplitChange(_columnName, columnNames, rowIndices, tuples, _removeOriginalColumn);
    return new HistoryEntry(historyEntryID, project, description, this, change);
}
Also used : Pattern(java.util.regex.Pattern) Serializable(java.io.Serializable) ColumnSplitChange(com.google.refine.model.changes.ColumnSplitChange) ArrayList(java.util.ArrayList) Change(com.google.refine.history.Change) ColumnSplitChange(com.google.refine.model.changes.ColumnSplitChange) FilteredRows(com.google.refine.browsing.FilteredRows) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) ArrayList(java.util.ArrayList) List(java.util.List) RowVisitor(com.google.refine.browsing.RowVisitor) Engine(com.google.refine.browsing.Engine)

Aggregations

Engine (com.google.refine.browsing.Engine)1 FilteredRows (com.google.refine.browsing.FilteredRows)1 RowVisitor (com.google.refine.browsing.RowVisitor)1 Change (com.google.refine.history.Change)1 HistoryEntry (com.google.refine.history.HistoryEntry)1 Column (com.google.refine.model.Column)1 ColumnSplitChange (com.google.refine.model.changes.ColumnSplitChange)1 Serializable (java.io.Serializable)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Pattern (java.util.regex.Pattern)1