Search in sources :

Example 41 with Column

use of com.google.refine.model.Column in project OpenRefine by OpenRefine.

the class Clusterer method initializeFromJSON.

public void initializeFromJSON(Project project, JSONObject o) throws Exception {
    _project = project;
    _config = o;
    String colname = o.getString("column");
    for (Column column : project.columnModel.columns) {
        if (column.getName().equals(colname)) {
            _colindex = column.getCellIndex();
        }
    }
}
Also used : Column(com.google.refine.model.Column)

Example 42 with Column

use of com.google.refine.model.Column in project OpenRefine by OpenRefine.

the class RdfTripleImporter method parseOneFile.

@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, InputStream input, int limit, JSONObject options, List<Exception> exceptions) {
    Graph graph;
    try {
        switch(mode) {
            case NT:
                graph = rdfReader.parseNTriples(input);
                break;
            case N3:
                graph = rdfReader.parseN3(input);
                break;
            case RDFXML:
                graph = rdfReader.parseRdfXml(input);
                break;
            default:
                throw new IllegalArgumentException("Unknown parsing mode");
        }
    } catch (Exception e) {
        exceptions.add(e);
        return;
    }
    ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
    try {
        Map<String, List<Row>> subjectToRows = new LinkedHashMap<String, List<Row>>();
        Column subjectColumn = new Column(project.columnModel.allocateNewCellIndex(), "subject");
        project.columnModel.addColumn(0, subjectColumn, false);
        project.columnModel.setKeyColumnIndex(0);
        for (Triple triple : triples) {
            String subject = triple.getSubject().toString();
            String predicate = triple.getPredicate().toString();
            String object = triple.getObject().toString();
            Column column = project.columnModel.getColumnByName(predicate);
            if (column == null) {
                column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
                project.columnModel.addColumn(-1, column, true);
            }
            int cellIndex = column.getCellIndex();
            if (subjectToRows.containsKey(subject)) {
                List<Row> rows = subjectToRows.get(subject);
                for (Row row : rows) {
                    if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
                        row.setCell(cellIndex, new Cell(object, null));
                        object = null;
                        break;
                    }
                }
                if (object != null) {
                    Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
                    rows.add(row);
                    row.setCell(cellIndex, new Cell(object, null));
                }
            } else {
                List<Row> rows = new ArrayList<Row>();
                subjectToRows.put(subject, rows);
                Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
                rows.add(row);
                row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
                row.setCell(cellIndex, new Cell(object, null));
            }
        }
        for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) {
            project.rows.addAll(entry.getValue());
        }
    } catch (ModelException e) {
        exceptions.add(e);
    } finally {
        triples.iterator().close();
    }
}
Also used : ModelException(com.google.refine.model.ModelException) ArrayList(java.util.ArrayList) ModelException(com.google.refine.model.ModelException) LinkedHashMap(java.util.LinkedHashMap) Triple(org.jrdf.graph.Triple) Graph(org.jrdf.graph.Graph) Column(com.google.refine.model.Column) ArrayList(java.util.ArrayList) List(java.util.List) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell)

Example 43 with Column

use of com.google.refine.model.Column in project OpenRefine by OpenRefine.

the class TabularImportingParserBase method readTable.

public static void readTable(Project project, ProjectMetadata metadata, ImportingJob job, TableDataReader reader, String fileSource, int limit, JSONObject options, List<Exception> exceptions) {
    int ignoreLines = JSONUtilities.getInt(options, "ignoreLines", -1);
    int headerLines = JSONUtilities.getInt(options, "headerLines", 1);
    int skipDataLines = JSONUtilities.getInt(options, "skipDataLines", 0);
    int limit2 = JSONUtilities.getInt(options, "limit", -1);
    if (limit > 0) {
        if (limit2 > 0) {
            limit2 = Math.min(limit, limit2);
        } else {
            limit2 = limit;
        }
    }
    boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", false);
    boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true);
    boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
    boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
    int filenameColumnIndex = -1;
    if (includeFileSources) {
        filenameColumnIndex = addFilenameColumn(project);
    }
    List<String> columnNames = new ArrayList<String>();
    boolean hasOurOwnColumnNames = headerLines > 0;
    List<Object> cells = null;
    int rowsWithData = 0;
    try {
        while (!job.canceled && (cells = reader.getNextRowOfCells()) != null) {
            if (ignoreLines > 0) {
                ignoreLines--;
                continue;
            }
            if (headerLines > 0) {
                // header lines
                for (int c = 0; c < cells.size(); c++) {
                    Object cell = cells.get(c);
                    String columnName;
                    if (cell == null) {
                        // add column even if cell is blank
                        columnName = "";
                    } else if (cell instanceof Cell) {
                        columnName = ((Cell) cell).value.toString().trim();
                    } else {
                        columnName = cell.toString().trim();
                    }
                    ImporterUtilities.appendColumnName(columnNames, c, columnName);
                }
                headerLines--;
                if (headerLines == 0) {
                    ImporterUtilities.setupColumns(project, columnNames);
                }
            } else {
                // data lines
                Row row = new Row(columnNames.size());
                if (storeBlankRows) {
                    rowsWithData++;
                } else if (cells.size() > 0) {
                    rowsWithData++;
                }
                if (skipDataLines <= 0 || rowsWithData > skipDataLines) {
                    boolean rowHasData = false;
                    for (int c = 0; c < cells.size(); c++) {
                        Column column = ImporterUtilities.getOrAllocateColumn(project, columnNames, c, hasOurOwnColumnNames);
                        Object value = cells.get(c);
                        if (value instanceof Cell) {
                            row.setCell(column.getCellIndex(), (Cell) value);
                            rowHasData = true;
                        } else if (ExpressionUtils.isNonBlankData(value)) {
                            Serializable storedValue;
                            if (value instanceof String) {
                                storedValue = guessCellValueTypes ? ImporterUtilities.parseCellValue((String) value) : (String) value;
                            } else {
                                storedValue = ExpressionUtils.wrapStorable(value);
                            }
                            row.setCell(column.getCellIndex(), new Cell(storedValue, null));
                            rowHasData = true;
                        } else if (!storeBlankCellsAsNulls) {
                            row.setCell(column.getCellIndex(), new Cell("", null));
                        } else {
                            row.setCell(column.getCellIndex(), null);
                        }
                    }
                    if (rowHasData || storeBlankRows) {
                        if (includeFileSources && filenameColumnIndex >= 0) {
                            row.setCell(filenameColumnIndex, new Cell(fileSource, null));
                        }
                        project.rows.add(row);
                    }
                    if (limit2 > 0 && project.rows.size() >= limit2) {
                        break;
                    }
                }
            }
        }
    } catch (IOException e) {
        exceptions.add(e);
    }
}
Also used : Serializable(java.io.Serializable) Column(com.google.refine.model.Column) ArrayList(java.util.ArrayList) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) IOException(java.io.IOException) Cell(com.google.refine.model.Cell)

Example 44 with Column

use of com.google.refine.model.Column in project OpenRefine by OpenRefine.

the class CellTuple method getField.

@Override
public Object getField(String name, Properties bindings) {
    Column column = project.columnModel.getColumnByName(name);
    if (column != null) {
        int cellIndex = column.getCellIndex();
        Cell cell = row.getCell(cellIndex);
        if (cell != null) {
            return new WrappedCell(project, name, cell);
        }
    }
    return null;
}
Also used : Column(com.google.refine.model.Column) Cell(com.google.refine.model.Cell)

Example 45 with Column

use of com.google.refine.model.Column in project OpenRefine by OpenRefine.

the class MultiValuedCellJoinOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
    Column column = project.columnModel.getColumnByName(_columnName);
    if (column == null) {
        throw new Exception("No column named " + _columnName);
    }
    int cellIndex = column.getCellIndex();
    Column keyColumn = project.columnModel.getColumnByName(_keyColumnName);
    if (keyColumn == null) {
        throw new Exception("No key column named " + _keyColumnName);
    }
    int keyCellIndex = keyColumn.getCellIndex();
    List<Row> newRows = new ArrayList<Row>();
    int oldRowCount = project.rows.size();
    for (int r = 0; r < oldRowCount; r++) {
        Row oldRow = project.rows.get(r);
        if (oldRow.isCellBlank(keyCellIndex)) {
            newRows.add(oldRow.dup());
            continue;
        }
        int r2 = r + 1;
        while (r2 < oldRowCount && project.rows.get(r2).isCellBlank(keyCellIndex)) {
            r2++;
        }
        if (r2 == r + 1) {
            newRows.add(oldRow.dup());
            continue;
        }
        StringBuffer sb = new StringBuffer();
        for (int r3 = r; r3 < r2; r3++) {
            Object value = project.rows.get(r3).getCellValue(cellIndex);
            if (ExpressionUtils.isNonBlankData(value)) {
                if (sb.length() > 0) {
                    sb.append(_separator);
                }
                sb.append(value.toString());
            }
        }
        for (int r3 = r; r3 < r2; r3++) {
            Row newRow = project.rows.get(r3).dup();
            if (r3 == r) {
                newRow.setCell(cellIndex, new Cell(sb.toString(), null));
            } else {
                newRow.setCell(cellIndex, null);
            }
            if (!newRow.isEmpty()) {
                newRows.add(newRow);
            }
        }
        // r will be incremented by the for loop anyway
        r = r2 - 1;
    }
    return new HistoryEntry(historyEntryID, project, getBriefDescription(null), this, new MassRowChange(newRows));
}
Also used : MassRowChange(com.google.refine.model.changes.MassRowChange) Column(com.google.refine.model.Column) ArrayList(java.util.ArrayList) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell) JSONException(org.json.JSONException)

Aggregations

Column (com.google.refine.model.Column)62 Row (com.google.refine.model.Row)25 Cell (com.google.refine.model.Cell)19 Project (com.google.refine.model.Project)16 ArrayList (java.util.ArrayList)14 JSONObject (org.json.JSONObject)14 JSONException (org.json.JSONException)11 HistoryEntry (com.google.refine.history.HistoryEntry)10 RowVisitor (com.google.refine.browsing.RowVisitor)9 Engine (com.google.refine.browsing.Engine)7 ParsingException (com.google.refine.expr.ParsingException)7 CellChange (com.google.refine.model.changes.CellChange)7 Serializable (java.io.Serializable)7 Properties (java.util.Properties)7 FilteredRows (com.google.refine.browsing.FilteredRows)6 NumericBinIndex (com.google.refine.browsing.util.NumericBinIndex)6 ColumnGroup (com.google.refine.model.ColumnGroup)6 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 Evaluable (com.google.refine.expr.Evaluable)5