Search in sources :

Example 1 with ModelException

use of com.google.refine.model.ModelException in project OpenRefine by OpenRefine.

the class RdfTripleImporter method parseOneFile.

@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, InputStream input, int limit, ObjectNode options, List<Exception> exceptions) {
    // create an empty model
    Model model = ModelFactory.createDefaultModel();
    try {
        switch(mode) {
            case NT:
                // TODO: The standard lang name is "N-TRIPLE"
                // we may need to switch if we change packagings
                model.read(input, null, "NT");
                break;
            case N3:
                model.read(input, null, "N3");
                break;
            case TTL:
                model.read(input, null, "TTL");
                break;
            case JSONLD:
                // TODO: The standard lang name is "JSONLD"
                model.read(input, null, "JSON-LD");
                break;
            case RDFXML:
                model.read(input, null);
                break;
            default:
                throw new IllegalArgumentException("Unknown parsing mode");
        }
    } catch (Exception e) {
        exceptions.add(e);
        return;
    }
    StmtIterator triples = model.listStatements();
    try {
        Map<String, List<Row>> subjectToRows = new LinkedHashMap<String, List<Row>>();
        Column subjectColumn = new Column(project.columnModel.allocateNewCellIndex(), "subject");
        project.columnModel.addColumn(0, subjectColumn, false);
        project.columnModel.setKeyColumnIndex(0);
        while (triples.hasNext()) {
            Statement triple = triples.nextStatement();
            String subject = triple.getSubject().toString();
            String predicate = triple.getPredicate().toString();
            String object = triple.getObject().toString();
            Column column = project.columnModel.getColumnByName(predicate);
            if (column == null) {
                column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
                project.columnModel.addColumn(-1, column, true);
            }
            int cellIndex = column.getCellIndex();
            if (subjectToRows.containsKey(subject)) {
                List<Row> rows = subjectToRows.get(subject);
                for (Row row : rows) {
                    if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
                        row.setCell(cellIndex, new Cell(object, null));
                        object = null;
                        break;
                    }
                }
                if (object != null) {
                    Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
                    rows.add(row);
                    row.setCell(cellIndex, new Cell(object, null));
                }
            } else {
                List<Row> rows = new ArrayList<Row>();
                subjectToRows.put(subject, rows);
                Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
                rows.add(row);
                row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
                row.setCell(cellIndex, new Cell(object, null));
            }
        }
        for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) {
            project.rows.addAll(entry.getValue());
        }
    } catch (ModelException e) {
        exceptions.add(e);
    }
}
Also used : StmtIterator(org.apache.jena.rdf.model.StmtIterator) ModelException(com.google.refine.model.ModelException) Statement(org.apache.jena.rdf.model.Statement) ArrayList(java.util.ArrayList) ModelException(com.google.refine.model.ModelException) LinkedHashMap(java.util.LinkedHashMap) Column(com.google.refine.model.Column) Model(org.apache.jena.rdf.model.Model) ArrayList(java.util.ArrayList) List(java.util.List) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell)

Example 2 with ModelException

use of com.google.refine.model.ModelException in project OpenRefine by OpenRefine.

the class ImporterUtilities method setupColumns.

public static void setupColumns(Project project, List<String> columnNames) {
    Map<String, Integer> nameToIndex = new HashMap<String, Integer>();
    for (int c = 0; c < columnNames.size(); c++) {
        String cell = columnNames.get(c).trim();
        if (cell.isEmpty()) {
            cell = "Column";
        } else if (cell.startsWith("\"") && cell.endsWith("\"")) {
            // FIXME: is trimming quotation marks appropriate?
            cell = cell.substring(1, cell.length() - 1).trim();
        }
        if (nameToIndex.containsKey(cell)) {
            int index = nameToIndex.get(cell);
            nameToIndex.put(cell, index + 1);
            cell = cell.contains(" ") ? (cell + " " + index) : (cell + index);
        } else {
            nameToIndex.put(cell, 2);
        }
        columnNames.set(c, cell);
        if (project.columnModel.getColumnByName(cell) == null) {
            Column column = new Column(project.columnModel.allocateNewCellIndex(), cell);
            try {
                project.columnModel.addColumn(project.columnModel.columns.size(), column, false);
            } catch (ModelException e) {
            // Ignore: shouldn't get in here since we just checked for duplicate names.
            }
        }
    }
}
Also used : ModelException(com.google.refine.model.ModelException) HashMap(java.util.HashMap) Column(com.google.refine.model.Column)

Example 3 with ModelException

use of com.google.refine.model.ModelException in project OpenRefine by OpenRefine.

the class RdfTripleImporter method parseOneFile.

@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, InputStream input, int limit, JSONObject options, List<Exception> exceptions) {
    Graph graph;
    try {
        switch(mode) {
            case NT:
                graph = rdfReader.parseNTriples(input);
                break;
            case N3:
                graph = rdfReader.parseN3(input);
                break;
            case RDFXML:
                graph = rdfReader.parseRdfXml(input);
                break;
            default:
                throw new IllegalArgumentException("Unknown parsing mode");
        }
    } catch (Exception e) {
        exceptions.add(e);
        return;
    }
    ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
    try {
        Map<String, List<Row>> subjectToRows = new LinkedHashMap<String, List<Row>>();
        Column subjectColumn = new Column(project.columnModel.allocateNewCellIndex(), "subject");
        project.columnModel.addColumn(0, subjectColumn, false);
        project.columnModel.setKeyColumnIndex(0);
        for (Triple triple : triples) {
            String subject = triple.getSubject().toString();
            String predicate = triple.getPredicate().toString();
            String object = triple.getObject().toString();
            Column column = project.columnModel.getColumnByName(predicate);
            if (column == null) {
                column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
                project.columnModel.addColumn(-1, column, true);
            }
            int cellIndex = column.getCellIndex();
            if (subjectToRows.containsKey(subject)) {
                List<Row> rows = subjectToRows.get(subject);
                for (Row row : rows) {
                    if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
                        row.setCell(cellIndex, new Cell(object, null));
                        object = null;
                        break;
                    }
                }
                if (object != null) {
                    Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
                    rows.add(row);
                    row.setCell(cellIndex, new Cell(object, null));
                }
            } else {
                List<Row> rows = new ArrayList<Row>();
                subjectToRows.put(subject, rows);
                Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
                rows.add(row);
                row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
                row.setCell(cellIndex, new Cell(object, null));
            }
        }
        for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) {
            project.rows.addAll(entry.getValue());
        }
    } catch (ModelException e) {
        exceptions.add(e);
    } finally {
        triples.iterator().close();
    }
}
Also used : ModelException(com.google.refine.model.ModelException) ArrayList(java.util.ArrayList) ModelException(com.google.refine.model.ModelException) LinkedHashMap(java.util.LinkedHashMap) Triple(org.jrdf.graph.Triple) Graph(org.jrdf.graph.Graph) Column(com.google.refine.model.Column) ArrayList(java.util.ArrayList) List(java.util.List) Row(com.google.refine.model.Row) Cell(com.google.refine.model.Cell)

Example 4 with ModelException

use of com.google.refine.model.ModelException in project OpenRefine by OpenRefine.

the class TsvExporterTests method CreateColumns.

//helper methods
protected void CreateColumns(int noOfColumns) {
    for (int i = 0; i < noOfColumns; i++) {
        try {
            project.columnModel.addColumn(i, new Column(i, "column" + i), true);
            project.columnModel.columns.get(i).getCellIndex();
        } catch (ModelException e1) {
            Assert.fail("Could not create column");
        }
    }
}
Also used : ModelException(com.google.refine.model.ModelException) Column(com.google.refine.model.Column)

Example 5 with ModelException

use of com.google.refine.model.ModelException in project OpenRefine by OpenRefine.

the class TsvExporterTests method CreateColumns.

// helper methods
protected void CreateColumns(int noOfColumns) {
    for (int i = 0; i < noOfColumns; i++) {
        try {
            project.columnModel.addColumn(i, new Column(i, "column" + i), true);
            project.columnModel.columns.get(i).getCellIndex();
        } catch (ModelException e1) {
            Assert.fail("Could not create column");
        }
    }
}
Also used : ModelException(com.google.refine.model.ModelException) Column(com.google.refine.model.Column)

Aggregations

Column (com.google.refine.model.Column)6 ModelException (com.google.refine.model.ModelException)6 Row (com.google.refine.model.Row)3 Cell (com.google.refine.model.Cell)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 List (java.util.List)2 Recon (com.google.refine.model.Recon)1 ReconStats (com.google.refine.model.ReconStats)1 ReconType (com.google.refine.model.ReconType)1 DataExtensionReconConfig (com.google.refine.model.recon.DataExtensionReconConfig)1 DataExtension (com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension)1 Model (org.apache.jena.rdf.model.Model)1 Statement (org.apache.jena.rdf.model.Statement)1 StmtIterator (org.apache.jena.rdf.model.StmtIterator)1 Graph (org.jrdf.graph.Graph)1 Triple (org.jrdf.graph.Triple)1