Search in sources :

Example 1 with Column

use of com.google.refine.model.Column in project OpenRefine by OpenRefine.

the class CustomizableTabularExporterUtilities method exportRows.

public static void exportRows(final Project project, final Engine engine, Properties params, final TabularSerializer serializer) {
    String optionsString = (params != null) ? params.getProperty("options") : null;
    JSONObject optionsTemp = null;
    if (optionsString != null) {
        try {
            optionsTemp = ParsingUtilities.evaluateJsonStringToObject(optionsString);
        } catch (JSONException e) {
        // Ignore and keep options null.
        }
    }
    final JSONObject options = optionsTemp;
    final boolean outputColumnHeaders = options == null ? true : JSONUtilities.getBoolean(options, "outputColumnHeaders", true);
    final boolean outputEmptyRows = options == null ? false : JSONUtilities.getBoolean(options, "outputBlankRows", true);
    final int limit = options == null ? -1 : JSONUtilities.getInt(options, "limit", -1);
    final List<String> columnNames;
    final Map<String, CellFormatter> columnNameToFormatter = new HashMap<String, CustomizableTabularExporterUtilities.CellFormatter>();
    JSONArray columnOptionArray = options == null ? null : JSONUtilities.getArray(options, "columns");
    if (columnOptionArray == null) {
        List<Column> columns = project.columnModel.columns;
        columnNames = new ArrayList<String>(columns.size());
        for (Column column : columns) {
            String name = column.getName();
            columnNames.add(name);
            columnNameToFormatter.put(name, new CellFormatter());
        }
    } else {
        int count = columnOptionArray.length();
        columnNames = new ArrayList<String>(count);
        for (int i = 0; i < count; i++) {
            JSONObject columnOptions = JSONUtilities.getObjectElement(columnOptionArray, i);
            if (columnOptions != null) {
                String name = JSONUtilities.getString(columnOptions, "name", null);
                if (name != null) {
                    columnNames.add(name);
                    columnNameToFormatter.put(name, new CellFormatter(columnOptions));
                }
            }
        }
    }
    RowVisitor visitor = new RowVisitor() {

        int rowCount = 0;

        @Override
        public void start(Project project) {
            serializer.startFile(options);
            if (outputColumnHeaders) {
                List<CellData> cells = new ArrayList<TabularSerializer.CellData>(columnNames.size());
                for (String name : columnNames) {
                    cells.add(new CellData(name, name, name, null));
                }
                serializer.addRow(cells, true);
            }
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            List<CellData> cells = new ArrayList<TabularSerializer.CellData>(columnNames.size());
            int nonNullCount = 0;
            for (String columnName : columnNames) {
                Column column = project.columnModel.getColumnByName(columnName);
                CellFormatter formatter = columnNameToFormatter.get(columnName);
                CellData cellData = formatter.format(project, column, row.getCell(column.getCellIndex()));
                cells.add(cellData);
                if (cellData != null) {
                    nonNullCount++;
                }
            }
            if (nonNullCount > 0 || outputEmptyRows) {
                serializer.addRow(cells, false);
                rowCount++;
            }
            return limit > 0 && rowCount >= limit;
        }

        @Override
        public void end(Project project) {
            serializer.endFile();
        }
    };
    FilteredRows filteredRows = engine.getAllFilteredRows();
    filteredRows.accept(project, visitor);
}
Also used : HashMap(java.util.HashMap) JSONArray(org.json.JSONArray) ArrayList(java.util.ArrayList) JSONException(org.json.JSONException) CellData(com.google.refine.exporters.TabularSerializer.CellData) FilteredRows(com.google.refine.browsing.FilteredRows) Project(com.google.refine.model.Project) JSONObject(org.json.JSONObject) Column(com.google.refine.model.Column) Row(com.google.refine.model.Row) RowVisitor(com.google.refine.browsing.RowVisitor)

Example 2 with Column

use of com.google.refine.model.Column in project OpenRefine by OpenRefine.

the class GuessTypesOfColumnCommand method doPost.

@Override
public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    try {
        Project project = getProject(request);
        String columnName = request.getParameter("columnName");
        String serviceUrl = request.getParameter("service");
        response.setCharacterEncoding("UTF-8");
        response.setHeader("Content-Type", "application/json");
        JSONWriter writer = new JSONWriter(response.getWriter());
        writer.object();
        Column column = project.columnModel.getColumnByName(columnName);
        if (column == null) {
            writer.key("code");
            writer.value("error");
            writer.key("message");
            writer.value("No such column");
        } else {
            List<TypeGroup> typeGroups = guessTypes(project, column, serviceUrl);
            writer.key("code");
            writer.value("ok");
            writer.key("types");
            writer.array();
            for (TypeGroup tg : typeGroups) {
                writer.object();
                writer.key("id");
                writer.value(tg.id);
                writer.key("name");
                writer.value(tg.name);
                writer.key("score");
                writer.value(tg.score);
                writer.key("count");
                writer.value(tg.count);
                writer.endObject();
            }
            writer.endArray();
        }
        writer.endObject();
    } catch (Exception e) {
        respondException(response, e);
    }
}
Also used : JSONWriter(org.json.JSONWriter) Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) ServletException(javax.servlet.ServletException) JSONException(org.json.JSONException) IOException(java.io.IOException)

Example 3 with Column

use of com.google.refine.model.Column in project OpenRefine by OpenRefine.

the class FacetCount method call.

@Override
public Object call(Properties bindings, Object[] args) {
    if (args.length == 3 && args[1] instanceof String && args[2] instanceof String) {
        // choice value to look up
        Object choiceValue = args[0];
        String facetExpression = (String) args[1];
        String columnName = (String) args[2];
        Project project = (Project) bindings.get("project");
        Column column = project.columnModel.getColumnByName(columnName);
        if (column == null) {
            return new EvalError("No such column named " + columnName);
        }
        String key = "nominal-bin:" + facetExpression;
        ExpressionNominalValueGrouper grouper = (ExpressionNominalValueGrouper) column.getPrecompute(key);
        if (grouper == null) {
            try {
                Evaluable eval = MetaParser.parse(facetExpression);
                Engine engine = new Engine(project);
                grouper = new ExpressionNominalValueGrouper(eval, columnName, column.getCellIndex());
                engine.getAllRows().accept(project, grouper);
                column.setPrecompute(key, grouper);
            } catch (ParsingException e) {
                return new EvalError("Error parsing facet expression " + facetExpression);
            }
        }
        return grouper.getChoiceValueCountMultiple(choiceValue);
    }
    return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a choice value, an expression as a string, and a column name");
}
Also used : Project(com.google.refine.model.Project) Evaluable(com.google.refine.expr.Evaluable) Column(com.google.refine.model.Column) ParsingException(com.google.refine.expr.ParsingException) EvalError(com.google.refine.expr.EvalError) Engine(com.google.refine.browsing.Engine) ExpressionNominalValueGrouper(com.google.refine.browsing.util.ExpressionNominalValueGrouper)

Example 4 with Column

use of com.google.refine.model.Column in project OpenRefine by OpenRefine.

the class TreeImportUtilities method createColumnsFromImport.

public static void createColumnsFromImport(Project project, ImportColumnGroup columnGroup) {
    int startColumnIndex = project.columnModel.columns.size();
    List<ImportColumn> columns = new ArrayList<ImportColumn>(columnGroup.columns.values());
    Collections.sort(columns, new Comparator<ImportColumn>() {

        @Override
        public int compare(ImportColumn o1, ImportColumn o2) {
            if (o1.blankOnFirstRow != o2.blankOnFirstRow) {
                return o1.blankOnFirstRow ? 1 : -1;
            }
            int c = o2.nonBlankCount - o1.nonBlankCount;
            return c != 0 ? c : (o1.name.length() - o2.name.length());
        }
    });
    for (int i = 0; i < columns.size(); i++) {
        ImportColumn c = columns.get(i);
        Column column = new com.google.refine.model.Column(c.cellIndex, c.name);
        project.columnModel.columns.add(column);
    }
    List<ImportColumnGroup> subgroups = new ArrayList<ImportColumnGroup>(columnGroup.subgroups.values());
    Collections.sort(subgroups, new Comparator<ImportColumnGroup>() {

        @Override
        public int compare(ImportColumnGroup o1, ImportColumnGroup o2) {
            // TODO: We really want the column/group with the highest % of 
            // records with at least one row populated, so popular optional
            // elements with multiple instances per record don't 
            // outweigh mandatory elements with a single occurrence per record
            // TODO: From a human factors point of view, we probably want
            // to try to preserve the order that we found things in the XML
            // Sort by most populated first, then shortest name
            int c = o2.nonBlankCount - o1.nonBlankCount;
            return c != 0 ? c : (o1.name.length() - o2.name.length());
        }
    });
    for (ImportColumnGroup g : subgroups) {
        createColumnsFromImport(project, g);
    }
    int endColumnIndex = project.columnModel.columns.size();
    int span = endColumnIndex - startColumnIndex;
    if (span > 1 && span < project.columnModel.columns.size()) {
        // TODO: Only use "key column" if it's 100% populated?
        project.columnModel.addColumnGroup(startColumnIndex, span, startColumnIndex);
    }
}
Also used : Column(com.google.refine.model.Column) ArrayList(java.util.ArrayList)

Example 5 with Column

use of com.google.refine.model.Column in project OpenRefine by OpenRefine.

the class ColumnReorderChange method apply.

@Override
public void apply(Project project) {
    synchronized (project) {
        if (_newColumns == null) {
            _newColumns = new ArrayList<Column>();
            _oldColumns = new ArrayList<Column>(project.columnModel.columns);
            for (String n : _columnNames) {
                Column column = project.columnModel.getColumnByName(n);
                if (column != null) {
                    _newColumns.add(column);
                }
            }
            _oldColumnGroups = new ArrayList<ColumnGroup>(project.columnModel.columnGroups);
        }
        project.columnModel.columns.clear();
        project.columnModel.columns.addAll(_newColumns);
        project.columnModel.columnGroups.clear();
        project.update();
    }
}
Also used : Column(com.google.refine.model.Column) ColumnGroup(com.google.refine.model.ColumnGroup)

Aggregations

Column (com.google.refine.model.Column)62 Row (com.google.refine.model.Row)25 Cell (com.google.refine.model.Cell)19 Project (com.google.refine.model.Project)16 ArrayList (java.util.ArrayList)14 JSONObject (org.json.JSONObject)14 JSONException (org.json.JSONException)11 HistoryEntry (com.google.refine.history.HistoryEntry)10 RowVisitor (com.google.refine.browsing.RowVisitor)9 Engine (com.google.refine.browsing.Engine)7 ParsingException (com.google.refine.expr.ParsingException)7 CellChange (com.google.refine.model.changes.CellChange)7 Serializable (java.io.Serializable)7 Properties (java.util.Properties)7 FilteredRows (com.google.refine.browsing.FilteredRows)6 NumericBinIndex (com.google.refine.browsing.util.NumericBinIndex)6 ColumnGroup (com.google.refine.model.ColumnGroup)6 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 Evaluable (com.google.refine.expr.Evaluable)5