Search in sources :

Example 1 with Recon

use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.

the class OdsImporter method extractCell.

protected static Cell extractCell(OdfTableCell cell, Map<String, Recon> reconMap) {
    Serializable value = extractCell(cell);
    if (value != null) {
        Recon recon = null;
        // TODO: cell.getHyperlink();
        String hyperlink = "";
        if (hyperlink != null) {
            // TODO: hyperlink.getAddress();
            String url = hyperlink;
            if (url.startsWith("http://") || url.startsWith("https://")) {
                final String sig = "freebase.com/view";
                int i = url.indexOf(sig);
                if (i > 0) {
                    String id = url.substring(i + sig.length());
                    int q = id.indexOf('?');
                    if (q > 0) {
                        id = id.substring(0, q);
                    }
                    int h = id.indexOf('#');
                    if (h > 0) {
                        id = id.substring(0, h);
                    }
                    if (reconMap.containsKey(id)) {
                        recon = reconMap.get(id);
                        recon.judgmentBatchSize++;
                    } else {
                        recon = new Recon(0, null, null);
                        recon.service = "import";
                        recon.match = new ReconCandidate(id, value.toString(), new String[0], 100);
                        recon.matchRank = 0;
                        recon.judgment = Judgment.Matched;
                        recon.judgmentAction = "auto";
                        recon.judgmentBatchSize = 1;
                        recon.addCandidate(recon.match);
                        reconMap.put(id, recon);
                    }
                }
            }
        }
        return new Cell(value, recon);
    } else {
        return null;
    }
}
Also used : Serializable(java.io.Serializable) Recon(com.google.refine.model.Recon) OdfTableCell(org.odftoolkit.odfdom.doc.table.OdfTableCell) Cell(com.google.refine.model.Cell) ReconCandidate(com.google.refine.model.ReconCandidate)

Example 2 with Recon

use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.

the class MassReconChange method loadRecons.

protected static void loadRecons(LineNumberReader reader, Pool pool, Map<Long, Recon> recons, String countString) throws Exception {
    int count = Integer.parseInt(countString);
    for (int i = 0; i < count; i++) {
        String line = reader.readLine();
        Recon recon = Recon.loadStreaming(line, pool);
        recons.put(recon.id, recon);
    }
}
Also used : Recon(com.google.refine.model.Recon)

Example 3 with Recon

use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.

the class StandardReconConfig method batchRecon.

@Override
public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID) {
    List<Recon> recons = new ArrayList<Recon>(jobs.size());
    StringWriter stringWriter = new StringWriter();
    stringWriter.write("{");
    for (int i = 0; i < jobs.size(); i++) {
        StandardReconJob job = (StandardReconJob) jobs.get(i);
        if (i > 0) {
            stringWriter.write(",");
        }
        stringWriter.write("\"q" + i + "\":");
        stringWriter.write(job.code);
    }
    stringWriter.write("}");
    String queriesString = stringWriter.toString();
    try {
        URL url = new URL(service);
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        {
            connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
            connection.setConnectTimeout(30000);
            connection.setDoOutput(true);
            DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
            try {
                String body = "queries=" + ParsingUtilities.encode(queriesString);
                dos.writeBytes(body);
            } finally {
                dos.flush();
                dos.close();
            }
            connection.connect();
        }
        if (connection.getResponseCode() >= 400) {
            InputStream is = connection.getErrorStream();
            logger.error("Failed  - code:" + Integer.toString(connection.getResponseCode()) + " message: " + is == null ? "" : ParsingUtilities.inputStreamToString(is));
        } else {
            InputStream is = connection.getInputStream();
            try {
                String s = ParsingUtilities.inputStreamToString(is);
                JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
                for (int i = 0; i < jobs.size(); i++) {
                    StandardReconJob job = (StandardReconJob) jobs.get(i);
                    Recon recon = null;
                    String text = job.text;
                    String key = "q" + i;
                    if (o.has(key)) {
                        JSONObject o2 = o.getJSONObject(key);
                        if (o2.has("result")) {
                            JSONArray results = o2.getJSONArray("result");
                            recon = createReconServiceResults(text, results, historyEntryID);
                        } else {
                            logger.warn("Service error for text: " + text + "\n  Job code: " + job.code + "\n  Response: " + o2.toString());
                        }
                    } else {
                        logger.warn("Service error for text: " + text + "\n  Job code: " + job.code);
                    }
                    if (recon != null) {
                        recon.service = service;
                    }
                    recons.add(recon);
                }
            } finally {
                is.close();
            }
        }
    } catch (Exception e) {
        logger.error("Failed to batch recon with load:\n" + queriesString, e);
    }
    while (recons.size() < jobs.size()) {
        Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
        recon.service = service;
        recon.identifierSpace = identifierSpace;
        recon.schemaSpace = schemaSpace;
        recons.add(recon);
    }
    return recons;
}
Also used : DataOutputStream(java.io.DataOutputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) JSONArray(org.json.JSONArray) URL(java.net.URL) JSONException(org.json.JSONException) HttpURLConnection(java.net.HttpURLConnection) StringWriter(java.io.StringWriter) JSONObject(org.json.JSONObject) Recon(com.google.refine.model.Recon)

Example 4 with Recon

use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.

the class StandardReconConfig method createNewRecon.

@Override
public Recon createNewRecon(long historyEntryID) {
    Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
    recon.service = service;
    return recon;
}
Also used : Recon(com.google.refine.model.Recon)

Example 5 with Recon

use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.

the class ReconCopyAcrossColumnsOperation method createHistoryEntry.

@Override
protected HistoryEntry createHistoryEntry(final Project project, final long historyEntryID) throws Exception {
    Engine engine = createEngine(project);
    final Column fromColumn = project.columnModel.getColumnByName(_fromColumnName);
    final List<Column> toColumns = new ArrayList<Column>(_toColumnNames.length);
    for (String c : _toColumnNames) {
        Column toColumn = project.columnModel.getColumnByName(c);
        if (toColumn != null) {
            toColumns.add(toColumn);
        }
    }
    final Set<Recon.Judgment> judgments = new HashSet<Recon.Judgment>(_judgments.length);
    for (String j : _judgments) {
        judgments.add(Recon.stringToJudgment(j));
    }
    final List<CellChange> cellChanges = new ArrayList<CellChange>(project.rows.size());
    if (fromColumn != null && toColumns.size() > 0) {
        final Map<Object, Recon> cellValueToRecon = new HashMap<Object, Recon>();
        FilteredRows filteredRows = engine.getAllFilteredRows();
        try {
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    Cell cell = row.getCell(fromColumn.getCellIndex());
                    if (cell != null && cell.value != null && cell.recon != null) {
                        if (judgments.contains(cell.recon.judgment)) {
                            cellValueToRecon.put(cell.value, cell.recon);
                        }
                    }
                    return false;
                }
            });
            filteredRows.accept(project, new RowVisitor() {

                @Override
                public void start(Project project) {
                // nothing to do
                }

                @Override
                public void end(Project project) {
                // nothing to do
                }

                @Override
                public boolean visit(Project project, int rowIndex, Row row) {
                    for (Column column : toColumns) {
                        int cellIndex = column.getCellIndex();
                        Cell cell = row.getCell(cellIndex);
                        if (cell != null && cell.value != null) {
                            Recon reconToCopy = cellValueToRecon.get(cell.value);
                            boolean judged = cell.recon != null && cell.recon.judgment != Judgment.None;
                            if (reconToCopy != null && (!judged || _applyToJudgedCells)) {
                                Cell newCell = new Cell(cell.value, reconToCopy);
                                CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
                                cellChanges.add(cellChange);
                            }
                        }
                    }
                    return false;
                }
            });
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    String description = "Copy " + cellChanges.size() + " recon judgments from column " + _fromColumnName + " to " + StringUtils.join(_toColumnNames);
    return new HistoryEntry(historyEntryID, project, description, this, new MassChange(cellChanges, false));
}
Also used : HashMap(java.util.HashMap) CellChange(com.google.refine.model.changes.CellChange) ArrayList(java.util.ArrayList) FilteredRows(com.google.refine.browsing.FilteredRows) JSONException(org.json.JSONException) MassChange(com.google.refine.model.changes.MassChange) Project(com.google.refine.model.Project) Column(com.google.refine.model.Column) HistoryEntry(com.google.refine.history.HistoryEntry) JSONObject(org.json.JSONObject) Row(com.google.refine.model.Row) Recon(com.google.refine.model.Recon) RowVisitor(com.google.refine.browsing.RowVisitor) Cell(com.google.refine.model.Cell) Engine(com.google.refine.browsing.Engine) HashSet(java.util.HashSet) Judgment(com.google.refine.model.Recon.Judgment)

Aggregations

Recon (com.google.refine.model.Recon)13 Cell (com.google.refine.model.Cell)6 JSONException (org.json.JSONException)5 JSONObject (org.json.JSONObject)5 ReconCandidate (com.google.refine.model.ReconCandidate)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 RowVisitor (com.google.refine.browsing.RowVisitor)2 Column (com.google.refine.model.Column)2 Project (com.google.refine.model.Project)2 Row (com.google.refine.model.Row)2 CellChange (com.google.refine.model.changes.CellChange)2 IOException (java.io.IOException)2 Serializable (java.io.Serializable)2 Map (java.util.Map)2 JSONArray (org.json.JSONArray)2 JSONWriter (org.json.JSONWriter)2 OdfTableCell (org.odftoolkit.odfdom.doc.table.OdfTableCell)2 Engine (com.google.refine.browsing.Engine)1 FilteredRows (com.google.refine.browsing.FilteredRows)1