use of com.google.refine.browsing.FilteredRows in project OpenRefine by OpenRefine.
the class ColumnSplitOperation method createHistoryEntry.
@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
Engine engine = createEngine(project);
Column column = project.columnModel.getColumnByName(_columnName);
if (column == null) {
throw new Exception("No column named " + _columnName);
}
List<String> columnNames = new ArrayList<String>();
List<Integer> rowIndices = new ArrayList<Integer>(project.rows.size());
List<List<Serializable>> tuples = new ArrayList<List<Serializable>>(project.rows.size());
FilteredRows filteredRows = engine.getAllFilteredRows();
RowVisitor rowVisitor;
if ("lengths".equals(_mode)) {
rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {
@Override
protected java.util.List<Serializable> split(String s) {
List<Serializable> results = new ArrayList<Serializable>(_fieldLengths.length + 1);
int lastIndex = 0;
for (int length : _fieldLengths) {
int from = lastIndex;
int to = Math.min(from + length, s.length());
results.add(stringToValue(s.substring(from, to)));
lastIndex = to;
}
return results;
}
;
};
} else if (_regex) {
Pattern pattern = Pattern.compile(_separator);
rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {
Pattern _pattern;
@Override
protected java.util.List<Serializable> split(String s) {
return stringArrayToValueList(_pattern.split(s, _maxColumns));
}
;
public RowVisitor init(Pattern pattern) {
_pattern = pattern;
return this;
}
}.init(pattern);
} else {
rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) {
@Override
protected java.util.List<Serializable> split(String s) {
return stringArrayToValueList(StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator, _maxColumns));
}
;
};
}
filteredRows.accept(project, rowVisitor);
String description = "Split " + rowIndices.size() + " cell(s) in column " + _columnName + " into several columns" + ("separator".equals(_mode) ? " by separator" : " by field lengths");
Change change = new ColumnSplitChange(_columnName, columnNames, rowIndices, tuples, _removeOriginalColumn);
return new HistoryEntry(historyEntryID, project, description, this, change);
}
use of com.google.refine.browsing.FilteredRows in project OpenRefine by OpenRefine.
the class ReconCopyAcrossColumnsOperation method createHistoryEntry.
@Override
protected HistoryEntry createHistoryEntry(final Project project, final long historyEntryID) throws Exception {
Engine engine = createEngine(project);
final Column fromColumn = project.columnModel.getColumnByName(_fromColumnName);
final List<Column> toColumns = new ArrayList<Column>(_toColumnNames.length);
for (String c : _toColumnNames) {
Column toColumn = project.columnModel.getColumnByName(c);
if (toColumn != null) {
toColumns.add(toColumn);
}
}
final Set<Recon.Judgment> judgments = new HashSet<Recon.Judgment>(_judgments.length);
for (String j : _judgments) {
judgments.add(Recon.stringToJudgment(j));
}
final List<CellChange> cellChanges = new ArrayList<CellChange>(project.rows.size());
if (fromColumn != null && toColumns.size() > 0) {
final Map<Object, Recon> cellValueToRecon = new HashMap<Object, Recon>();
FilteredRows filteredRows = engine.getAllFilteredRows();
try {
filteredRows.accept(project, new RowVisitor() {
@Override
public void start(Project project) {
// nothing to do
}
@Override
public void end(Project project) {
// nothing to do
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
Cell cell = row.getCell(fromColumn.getCellIndex());
if (cell != null && cell.value != null && cell.recon != null) {
if (judgments.contains(cell.recon.judgment)) {
cellValueToRecon.put(cell.value, cell.recon);
}
}
return false;
}
});
filteredRows.accept(project, new RowVisitor() {
@Override
public void start(Project project) {
// nothing to do
}
@Override
public void end(Project project) {
// nothing to do
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
for (Column column : toColumns) {
int cellIndex = column.getCellIndex();
Cell cell = row.getCell(cellIndex);
if (cell != null && cell.value != null) {
Recon reconToCopy = cellValueToRecon.get(cell.value);
boolean judged = cell.recon != null && cell.recon.judgment != Judgment.None;
if (reconToCopy != null && (!judged || _applyToJudgedCells)) {
Cell newCell = new Cell(cell.value, reconToCopy);
CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
cellChanges.add(cellChange);
}
}
}
return false;
}
});
} catch (Exception e) {
e.printStackTrace();
}
}
String description = "Copy " + cellChanges.size() + " recon judgments from column " + _fromColumnName + " to " + StringUtils.join(_toColumnNames);
return new HistoryEntry(historyEntryID, project, description, this, new MassChange(cellChanges, false));
}
use of com.google.refine.browsing.FilteredRows in project OpenRefine by OpenRefine.
the class RowFlagOperation method createHistoryEntry.
@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
Engine engine = createEngine(project);
List<Change> changes = new ArrayList<Change>(project.rows.size());
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, createRowVisitor(project, changes));
return new HistoryEntry(historyEntryID, project, (_flagged ? "Flag" : "Unflag") + " " + changes.size() + " rows", this, new MassChange(changes, false));
}
use of com.google.refine.browsing.FilteredRows in project OpenRefine by OpenRefine.
the class GetScatterplotCommand method draw.
public void draw(OutputStream output, Project project, Engine engine, JSONObject o) throws IOException, JSONException {
double min_x = 0;
double min_y = 0;
double max_x = 0;
double max_y = 0;
int columnIndex_x = 0;
int columnIndex_y = 0;
Evaluable eval_x = null;
Evaluable eval_y = null;
int size = (o.has(ScatterplotFacet.SIZE)) ? o.getInt(ScatterplotFacet.SIZE) : 100;
double dot = (o.has(ScatterplotFacet.DOT)) ? o.getDouble(ScatterplotFacet.DOT) : 100;
int dim_x = (o.has(ScatterplotFacet.DIM_X)) ? ScatterplotFacet.getAxisDim(o.getString(ScatterplotFacet.DIM_X)) : ScatterplotFacet.LIN;
int dim_y = (o.has(ScatterplotFacet.DIM_Y)) ? ScatterplotFacet.getAxisDim(o.getString(ScatterplotFacet.DIM_Y)) : ScatterplotFacet.LIN;
int rotation = (o.has(ScatterplotFacet.ROTATION)) ? ScatterplotFacet.getRotation(o.getString(ScatterplotFacet.ROTATION)) : ScatterplotFacet.NO_ROTATION;
String color_str = (o.has(ScatterplotFacet.COLOR)) ? o.getString(ScatterplotFacet.COLOR) : "000000";
Color color = new Color(Integer.parseInt(color_str, 16));
String base_color_str = (o.has(ScatterplotFacet.BASE_COLOR)) ? o.getString(ScatterplotFacet.BASE_COLOR) : null;
Color base_color = base_color_str != null ? new Color(Integer.parseInt(base_color_str, 16)) : null;
String columnName_x = o.getString(ScatterplotFacet.X_COLUMN_NAME);
String expression_x = (o.has(ScatterplotFacet.X_EXPRESSION)) ? o.getString(ScatterplotFacet.X_EXPRESSION) : "value";
if (columnName_x.length() > 0) {
Column x_column = project.columnModel.getColumnByName(columnName_x);
if (x_column != null) {
columnIndex_x = x_column.getCellIndex();
}
} else {
columnIndex_x = -1;
}
try {
eval_x = MetaParser.parse(expression_x);
} catch (ParsingException e) {
logger.warn("error parsing expression", e);
}
String columnName_y = o.getString(ScatterplotFacet.Y_COLUMN_NAME);
String expression_y = (o.has(ScatterplotFacet.Y_EXPRESSION)) ? o.getString(ScatterplotFacet.Y_EXPRESSION) : "value";
if (columnName_y.length() > 0) {
Column y_column = project.columnModel.getColumnByName(columnName_y);
if (y_column != null) {
columnIndex_y = y_column.getCellIndex();
}
} else {
columnIndex_y = -1;
}
try {
eval_y = MetaParser.parse(expression_y);
} catch (ParsingException e) {
logger.warn("error parsing expression", e);
}
NumericBinIndex index_x = null;
NumericBinIndex index_y = null;
String col_x_name = o.getString(ScatterplotFacet.X_COLUMN_NAME);
Column column_x = project.columnModel.getColumnByName(col_x_name);
if (column_x != null) {
columnIndex_x = column_x.getCellIndex();
index_x = ScatterplotFacet.getBinIndex(project, column_x, eval_x, expression_x);
min_x = index_x.getMin();
max_x = index_x.getMax();
}
String col_y_name = o.getString(ScatterplotFacet.Y_COLUMN_NAME);
Column column_y = project.columnModel.getColumnByName(col_y_name);
if (column_y != null) {
columnIndex_y = column_y.getCellIndex();
index_y = ScatterplotFacet.getBinIndex(project, column_y, eval_y, expression_y);
min_y = index_y.getMin();
max_y = index_y.getMax();
}
if (index_x != null && index_y != null && index_x.isNumeric() && index_y.isNumeric()) {
ScatterplotDrawingRowVisitor drawer = new ScatterplotDrawingRowVisitor(columnIndex_x, columnIndex_y, min_x, max_x, min_y, max_y, size, dim_x, dim_y, rotation, dot, color);
if (base_color != null) {
drawer.setColor(base_color);
FilteredRows filteredRows = engine.getAllRows();
filteredRows.accept(project, drawer);
drawer.setColor(color);
}
{
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, drawer);
}
ImageIO.write(drawer.getImage(), "png", output);
} else {
ImageIO.write(new BufferedImage(1, 1, BufferedImage.TYPE_4BYTE_ABGR), "png", output);
}
}
use of com.google.refine.browsing.FilteredRows in project OpenRefine by OpenRefine.
the class kNNClusterer method computeClusters.
@Override
public void computeClusters(Engine engine) {
//VPTreeClusteringRowVisitor visitor = new VPTreeClusteringRowVisitor(_distance,_config);
BlockingClusteringRowVisitor visitor = new BlockingClusteringRowVisitor(_distance, _config);
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(_project, visitor);
_clusters = visitor.getClusters();
}
Aggregations