Search in sources :

Example 1 with Graph

use of org.jrdf.graph.Graph in project OpenRefine by OpenRefine.

the class RdfTripleImporter method parseOneFile.

@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, InputStream input, int limit, JSONObject options, List<Exception> exceptions) {
    Graph graph;
    try {
        switch(mode) {
            case NT:
                graph = rdfReader.parseNTriples(input);
                break;
            case N3:
                graph = rdfReader.parseN3(input);
                break;
            case RDFXML:
                graph = rdfReader.parseRdfXml(input);
                break;
            default:
                throw new IllegalArgumentException("Unknown parsing mode");
        }
    } catch (Exception e) {
        exceptions.add(e);
        return;
    }
    ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
    try {
        Map<String, List<Row>> subjectToRows = new LinkedHashMap<String, List<Row>>();
        Column subjectColumn = new Column(project.columnModel.allocateNewCellIndex(), "subject");
        project.columnModel.addColumn(0, subjectColumn, false);
        project.columnModel.setKeyColumnIndex(0);
        for (Triple triple : triples) {
            String subject = triple.getSubject().toString();
            String predicate = triple.getPredicate().toString();
            String object = triple.getObject().toString();
            Column column = project.columnModel.getColumnByName(predicate);
            if (column == null) {
                column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
                project.columnModel.addColumn(-1, column, true);
            }
            int cellIndex = column.getCellIndex();
            if (subjectToRows.containsKey(subject)) {
                List<Row> rows = subjectToRows.get(subject);
                for (Row row : rows) {
                    if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
                        row.setCell(cellIndex, new Cell(object, null));
                        object = null;
                        break;
                    }
                }
                if (object != null) {
                    Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
                    rows.add(row);
                    row.setCell(cellIndex, new Cell(object, null));
                }
            } else {
                List<Row> rows = new ArrayList<Row>();
                subjectToRows.put(subject, rows);
                Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
                rows.add(row);
                row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
                row.setCell(cellIndex, new Cell(object, null));
            }
        }
        for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) {
            project.rows.addAll(entry.getValue());
        }
    } catch (ModelException e) {
        exceptions.add(e);
    } finally {
        triples.iterator().close();
    }
}
Also used : ModelException(com.google.refine.model.ModelException) ArrayList(java.util.ArrayList) ModelException(com.google.refine.model.ModelException) LinkedHashMap(java.util.LinkedHashMap) Triple(org.jrdf.graph.Triple) Graph(org.jrdf.graph.Graph) Column(com.google.refine.model.Column) ArrayList(java.util.ArrayList) List(java.util.List) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell)

Aggregations

Cell (com.google.refine.model.Cell)1 Column (com.google.refine.model.Column)1 ModelException (com.google.refine.model.ModelException)1 Row (com.google.refine.model.Row)1 ArrayList (java.util.ArrayList)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Graph (org.jrdf.graph.Graph)1 Triple (org.jrdf.graph.Triple)1