Search in sources :

Example 6 with FilteredRows

use of com.google.refine.browsing.FilteredRows in project OpenRefine by OpenRefine.

the class ColumnSplitOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    Column column = project.columnModel.getColumnByName(_columnName);
    if (column == null) {
        throw new Exception("No column named " + _columnName);
    }
    List<String> columnNames = new ArrayList<String>();
    List<Integer> rowIndices = new ArrayList<Integer>(project.rows.size());
    List<List<Serializable>> tuples = new ArrayList<List<Serializable>>(project.rows.size());
    FilteredRows filteredRows = engine.getAllFilteredRows();
    RowVisitor rowVisitor;
    if ("lengths".equals(_mode)) {
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            @Override
            protected java.util.List<Serializable> split(String s) {
                List<Serializable> results = new ArrayList<Serializable>(_fieldLengths.length + 1);
                int lastIndex = 0;
                for (int length : _fieldLengths) {
                    int from = lastIndex;
                    int to = Math.min(from + length, s.length());
                    results.add(stringToValue(s.substring(from, to)));
                    lastIndex = to;
                }
                return results;
            }

            ;
        };
    } else if (_regex) {
        Pattern pattern = Pattern.compile(_separator);
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            Pattern _pattern;

            @Override
            protected java.util.List<Serializable> split(String s) {
                return stringArrayToValueList(_pattern.split(s, _maxColumns));
            }

            ;

            public RowVisitor init(Pattern pattern) {
                _pattern = pattern;
                return this;
            }
        }.init(pattern);
    } else {
        rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {

            @Override
            protected java.util.List<Serializable> split(String s) {
                return stringArrayToValueList(StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator, _maxColumns));
            }

            ;
        };
    }
    filteredRows.accept(project, rowVisitor);
    String description = "Split " + rowIndices.size() + " cell(s) in column " + _columnName + " into several columns" + ("separator".equals(_mode) ? " by separator" : " by field lengths");
    Change change = new ColumnSplitChange(_columnName, columnNames, rowIndices, tuples, _removeOriginalColumn);
    return new HistoryEntry(historyEntryID, project, description, this, change);
}
Also used : Pattern(java.util.regex.Pattern) Serializable(java.io.Serializable) ColumnSplitChange(com.google.refine.model.changes.ColumnSplitChange) ArrayList(java.util.ArrayList) Change(com.google.refine.history.Change) ColumnSplitChange(com.google.refine.model.changes.ColumnSplitChange) FilteredRows(com.google.refine.browsing.FilteredRows) JSONException(org.json.JSONException) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) ArrayList(java.util.ArrayList) List(java.util.List) RowVisitor(com.google.refine.browsing.RowVisitor) Engine(com.google.refine.browsing.Engine)

Example 7 with FilteredRows

use of com.google.refine.browsing.FilteredRows in project OpenRefine by OpenRefine.

the class ReconCopyAcrossColumnsOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(final Project project, final long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    final Column fromColumn = project.columnModel.getColumnByName(_fromColumnName);
    final List<Column> toColumns = new ArrayList<Column>(_toColumnNames.length);
    for (String c : _toColumnNames) {
        Column toColumn = project.columnModel.getColumnByName(c);
        if (toColumn != null) {
            toColumns.add(toColumn);
        }
    }
    final Set<Recon.Judgment> judgments = new HashSet<Recon.Judgment>(_judgments.length);
    for (String j : _judgments) {
        judgments.add(Recon.stringToJudgment(j));
    }
    final List<CellChange> cellChanges = new ArrayList<CellChange>(project.rows.size());
    if (fromColumn != null && toColumns.size() > 0) {
        final Map<Object, Recon> cellValueToRecon = new HashMap<Object, Recon>();
        FilteredRows filteredRows = engine.getAllFilteredRows();
        try {
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    Cell cell = row.getCell(fromColumn.getCellIndex());
                    if (cell != null && cell.value != null && cell.recon != null) {
                        if (judgments.contains(cell.recon.judgment)) {
                            cellValueToRecon.put(cell.value, cell.recon);
                        }
                    }
                    return false;
                }
            });
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    for (Column column : toColumns) {
                        int cellIndex = column.getCellIndex();
                        Cell cell = row.getCell(cellIndex);
                        if (cell != null && cell.value != null) {
                            Recon reconToCopy = cellValueToRecon.get(cell.value);
                            boolean judged = cell.recon != null && cell.recon.judgment != Judgment.None;
                            if (reconToCopy != null && (!judged || _applyToJudgedCells)) {
                                Cell newCell = new Cell(cell.value, reconToCopy);
                                CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
                                cellChanges.add(cellChange);
                            }
                        }
                    }
                    return false;
                }
            });
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    String description = "Copy " + cellChanges.size() + " recon judgments from column " + _fromColumnName + " to " + StringUtils.join(_toColumnNames);
    return new HistoryEntry(historyEntryID, project, description, this, new MassChange(cellChanges, false));
}
Also used : HashMap(java.util.HashMap) CellChange(com.google.refine.model.changes.CellChange) ArrayList(java.util.ArrayList) FilteredRows(com.google.refine.browsing.FilteredRows) JSONException(org.json.JSONException) MassChange(com.google.refine.model.changes.MassChange) Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Recon(com.google.refine.model.Recon) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell) Engine(com.google.refine.browsing.Engine) HashSet(java.util.HashSet) Judgment(com.google.refine.model.Recon.Judgment)

Example 8 with FilteredRows

use of com.google.refine.browsing.FilteredRows in project OpenRefine by OpenRefine.

the class RowFlagOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    List<Change> changes = new ArrayList<Change>(project.rows.size());
    FilteredRows filteredRows = engine.getAllFilteredRows();
    filteredRows.accept(project, createRowVisitor(project, changes));
    return new HistoryEntry(historyEntryID, project, (_flagged ? "Flag" : "Unflag") + " " + changes.size() + " rows", this, new MassChange(changes, false));
}
Also used : MassChange(com.google.refine.model.changes.MassChange) ArrayList(java.util.ArrayList) HistoryEntry(com.google.refine.history.HistoryEntry) RowFlagChange(com.google.refine.model.changes.RowFlagChange) Change(com.google.refine.history.Change) MassChange(com.google.refine.model.changes.MassChange) FilteredRows(com.google.refine.browsing.FilteredRows) Engine(com.google.refine.browsing.Engine)

Example 9 with FilteredRows

use of com.google.refine.browsing.FilteredRows in project OpenRefine by OpenRefine.

the class GetScatterplotCommand method draw.

public void draw(OutputStream output, Project project, Engine engine, JSONObject o) throws IOException, JSONException {
    double min_x = 0;
    double min_y = 0;
    double max_x = 0;
    double max_y = 0;
    int columnIndex_x = 0;
    int columnIndex_y = 0;
    Evaluable eval_x = null;
    Evaluable eval_y = null;
    int size = (o.has(ScatterplotFacet.SIZE)) ? o.getInt(ScatterplotFacet.SIZE) : 100;
    double dot = (o.has(ScatterplotFacet.DOT)) ? o.getDouble(ScatterplotFacet.DOT) : 100;
    int dim_x = (o.has(ScatterplotFacet.DIM_X)) ? ScatterplotFacet.getAxisDim(o.getString(ScatterplotFacet.DIM_X)) : ScatterplotFacet.LIN;
    int dim_y = (o.has(ScatterplotFacet.DIM_Y)) ? ScatterplotFacet.getAxisDim(o.getString(ScatterplotFacet.DIM_Y)) : ScatterplotFacet.LIN;
    int rotation = (o.has(ScatterplotFacet.ROTATION)) ? ScatterplotFacet.getRotation(o.getString(ScatterplotFacet.ROTATION)) : ScatterplotFacet.NO_ROTATION;
    String color_str = (o.has(ScatterplotFacet.COLOR)) ? o.getString(ScatterplotFacet.COLOR) : "000000";
    Color color = new Color(Integer.parseInt(color_str, 16));
    String base_color_str = (o.has(ScatterplotFacet.BASE_COLOR)) ? o.getString(ScatterplotFacet.BASE_COLOR) : null;
    Color base_color = base_color_str != null ? new Color(Integer.parseInt(base_color_str, 16)) : null;
    String columnName_x = o.getString(ScatterplotFacet.X_COLUMN_NAME);
    String expression_x = (o.has(ScatterplotFacet.X_EXPRESSION)) ? o.getString(ScatterplotFacet.X_EXPRESSION) : "value";
    if (columnName_x.length() > 0) {
        Column x_column = project.columnModel.getColumnByName(columnName_x);
        if (x_column != null) {
            columnIndex_x = x_column.getCellIndex();
        }
    } else {
        columnIndex_x = -1;
    }
    try {
        eval_x = MetaParser.parse(expression_x);
    } catch (ParsingException e) {
        logger.warn("error parsing expression", e);
    }
    String columnName_y = o.getString(ScatterplotFacet.Y_COLUMN_NAME);
    String expression_y = (o.has(ScatterplotFacet.Y_EXPRESSION)) ? o.getString(ScatterplotFacet.Y_EXPRESSION) : "value";
    if (columnName_y.length() > 0) {
        Column y_column = project.columnModel.getColumnByName(columnName_y);
        if (y_column != null) {
            columnIndex_y = y_column.getCellIndex();
        }
    } else {
        columnIndex_y = -1;
    }
    try {
        eval_y = MetaParser.parse(expression_y);
    } catch (ParsingException e) {
        logger.warn("error parsing expression", e);
    }
    NumericBinIndex index_x = null;
    NumericBinIndex index_y = null;
    String col_x_name = o.getString(ScatterplotFacet.X_COLUMN_NAME);
    Column column_x = project.columnModel.getColumnByName(col_x_name);
    if (column_x != null) {
        columnIndex_x = column_x.getCellIndex();
        index_x = ScatterplotFacet.getBinIndex(project, column_x, eval_x, expression_x);
        min_x = index_x.getMin();
        max_x = index_x.getMax();
    }
    String col_y_name = o.getString(ScatterplotFacet.Y_COLUMN_NAME);
    Column column_y = project.columnModel.getColumnByName(col_y_name);
    if (column_y != null) {
        columnIndex_y = column_y.getCellIndex();
        index_y = ScatterplotFacet.getBinIndex(project, column_y, eval_y, expression_y);
        min_y = index_y.getMin();
        max_y = index_y.getMax();
    }
    if (index_x != null && index_y != null && index_x.isNumeric() && index_y.isNumeric()) {
        ScatterplotDrawingRowVisitor drawer = new ScatterplotDrawingRowVisitor(columnIndex_x, columnIndex_y, min_x, max_x, min_y, max_y, size, dim_x, dim_y, rotation, dot, color);
        if (base_color != null) {
            drawer.setColor(base_color);
            FilteredRows filteredRows = engine.getAllRows();
            filteredRows.accept(project, drawer);
            drawer.setColor(color);
        }
        {
            FilteredRows filteredRows = engine.getAllFilteredRows();
            filteredRows.accept(project, drawer);
        }
        ImageIO.write(drawer.getImage(), "png", output);
    } else {
        ImageIO.write(new BufferedImage(1, 1, BufferedImage.TYPE_4BYTE_ABGR), "png", output);
    }
}
Also used : Evaluable(com.google.refine.expr.Evaluable) Column(com.google.refine.model.Column) NumericBinIndex(com.google.refine.browsing.util.NumericBinIndex) Color(java.awt.Color) ParsingException(com.google.refine.expr.ParsingException) ScatterplotDrawingRowVisitor(com.google.refine.browsing.facets.ScatterplotDrawingRowVisitor) FilteredRows(com.google.refine.browsing.FilteredRows) BufferedImage(java.awt.image.BufferedImage)

Example 10 with FilteredRows

use of com.google.refine.browsing.FilteredRows in project OpenRefine by OpenRefine.

the class kNNClusterer method computeClusters.

@Override
public void computeClusters(Engine engine) {
    //VPTreeClusteringRowVisitor visitor = new VPTreeClusteringRowVisitor(_distance,_config);
    BlockingClusteringRowVisitor visitor = new BlockingClusteringRowVisitor(_distance, _config);
    FilteredRows filteredRows = engine.getAllFilteredRows();
    filteredRows.accept(_project, visitor);
    _clusters = visitor.getClusters();
}
Also used : FilteredRows(com.google.refine.browsing.FilteredRows)

Aggregations

FilteredRows (com.google.refine.browsing.FilteredRows)13 Engine (com.google.refine.browsing.Engine)8 ArrayList (java.util.ArrayList)8 HistoryEntry (com.google.refine.history.HistoryEntry)7 Column (com.google.refine.model.Column)6 JSONException (org.json.JSONException)6 RowVisitor (com.google.refine.browsing.RowVisitor)5 Change (com.google.refine.history.Change)4 JSONObject (org.json.JSONObject)4 Project (com.google.refine.model.Project)3 MassChange (com.google.refine.model.changes.MassChange)3 HashMap (java.util.HashMap)3 FilteredRecords (com.google.refine.browsing.FilteredRecords)2 RecordVisitor (com.google.refine.browsing.RecordVisitor)2 ParsingException (com.google.refine.expr.ParsingException)2 Row (com.google.refine.model.Row)2 CellChange (com.google.refine.model.changes.CellChange)2 SortingRecordVisitor (com.google.refine.sorting.SortingRecordVisitor)2 SortingRowVisitor (com.google.refine.sorting.SortingRowVisitor)2 IOException (java.io.IOException)2