Search in sources :

Example 6 with Recon

use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.

the class Pool method save.

public void save(Writer writer) throws IOException {
    writer.write(RefineServlet.VERSION);
    writer.write('\n');
    Properties options = new Properties();
    options.setProperty("mode", "save");
    options.put("pool", this);
    Collection<Recon> recons2 = recons.values();
    writer.write("reconCount=" + recons2.size());
    writer.write('\n');
    for (Recon recon : recons2) {
        JSONWriter jsonWriter = new JSONWriter(writer);
        try {
            recon.write(jsonWriter, options);
            writer.write('\n');
        } catch (JSONException e) {
            e.printStackTrace();
        }
    }
}
Also used : JSONWriter(org.json.JSONWriter) JSONException(org.json.JSONException) Properties(java.util.Properties) Recon(com.google.refine.model.Recon)

Example 7 with Recon

use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.

the class StandardReconConfig method createReconServiceResults.

protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) {
    Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
    try {
        int length = results.length();
        int count = 0;
        for (int i = 0; i < length; i++) {
            JSONObject result = results.getJSONObject(i);
            if (!result.has("name")) {
                continue;
            }
            JSONArray types = result.getJSONArray("type");
            String[] typeIDs = new String[types.length()];
            for (int j = 0; j < typeIDs.length; j++) {
                Object type = types.get(j);
                typeIDs[j] = type instanceof String ? (String) type : ((JSONObject) type).getString("id");
            }
            double score = result.getDouble("score");
            ReconCandidate candidate = new ReconCandidate(result.getString("id"), result.getString("name"), typeIDs, score);
            if (autoMatch && i == 0 && result.has("match") && result.getBoolean("match")) {
                recon.match = candidate;
                recon.matchRank = 0;
                recon.judgment = Judgment.Matched;
                recon.judgmentAction = "auto";
            }
            recon.addCandidate(candidate);
            count++;
        }
        if (count > 0) {
            ReconCandidate candidate = recon.candidates.get(0);
            recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name));
            recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(StringUtils.lowerCase(text), StringUtils.lowerCase(candidate.name)));
            recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.name));
            recon.setFeature(Recon.Feature_typeMatch, false);
            if (this.typeID != null) {
                for (String typeID : candidate.types) {
                    if (this.typeID.equals(typeID)) {
                        recon.setFeature(Recon.Feature_typeMatch, true);
                        break;
                    }
                }
            }
        }
    } catch (JSONException e) {
        e.printStackTrace();
    }
    return recon;
}
Also used : JSONObject(org.json.JSONObject) JSONArray(org.json.JSONArray) JSONException(org.json.JSONException) JSONObject(org.json.JSONObject) Recon(com.google.refine.model.Recon) ReconCandidate(com.google.refine.model.ReconCandidate)

Example 8 with Recon

use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.

the class ExcelImporter method parseOneFile.

@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, InputStream inputStream, int limit, JSONObject options, List<Exception> exceptions) {
    Workbook wb = null;
    if (!inputStream.markSupported()) {
        inputStream = new PushbackInputStream(inputStream, 8);
    }
    try {
        wb = POIXMLDocument.hasOOXMLHeader(inputStream) ? new XSSFWorkbook(inputStream) : new HSSFWorkbook(new POIFSFileSystem(inputStream));
    } catch (IOException e) {
        exceptions.add(new ImportException("Attempted to parse as an Excel file but failed. " + "Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.", e));
        return;
    } catch (ArrayIndexOutOfBoundsException e) {
        exceptions.add(new ImportException("Attempted to parse file as an Excel file but failed. " + "This is probably caused by a corrupt excel file, or due to the file having previously been created or saved by a non-Microsoft application. " + "Please try opening the file in Microsoft Excel and resaving it, then try re-uploading the file. " + "See https://issues.apache.org/bugzilla/show_bug.cgi?id=48261 for further details", e));
        return;
    } catch (IllegalArgumentException e) {
        exceptions.add(new ImportException("Attempted to parse as an Excel file but failed. " + "Only Excel 97 and later formats are supported.", e));
        return;
    } catch (POIXMLException e) {
        exceptions.add(new ImportException("Attempted to parse as an Excel file but failed. " + "Invalid XML.", e));
        return;
    }
    int[] sheets = JSONUtilities.getIntArray(options, "sheets");
    for (int sheetIndex : sheets) {
        final Sheet sheet = wb.getSheetAt(sheetIndex);
        final int lastRow = sheet.getLastRowNum();
        TableDataReader dataReader = new TableDataReader() {

            int nextRow = 0;

            Map<String, Recon> reconMap = new HashMap<String, Recon>();

            @Override
            public List<Object> getNextRowOfCells() throws IOException {
                if (nextRow > lastRow) {
                    return null;
                }
                List<Object> cells = new ArrayList<Object>();
                org.apache.poi.ss.usermodel.Row row = sheet.getRow(nextRow++);
                if (row != null) {
                    short lastCell = row.getLastCellNum();
                    for (short cellIndex = 0; cellIndex < lastCell; cellIndex++) {
                        Cell cell = null;
                        org.apache.poi.ss.usermodel.Cell sourceCell = row.getCell(cellIndex);
                        if (sourceCell != null) {
                            cell = extractCell(sourceCell, reconMap);
                        }
                        cells.add(cell);
                    }
                }
                return cells;
            }
        };
        TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource + "#" + sheet.getSheetName(), limit, options, exceptions);
    }
}
Also used : ArrayList(java.util.ArrayList) POIXMLException(org.apache.poi.POIXMLException) PushbackInputStream(java.io.PushbackInputStream) XSSFWorkbook(org.apache.poi.xssf.usermodel.XSSFWorkbook) Cell(com.google.refine.model.Cell) IOException(java.io.IOException) XSSFWorkbook(org.apache.poi.xssf.usermodel.XSSFWorkbook) Workbook(org.apache.poi.ss.usermodel.Workbook) HSSFWorkbook(org.apache.poi.hssf.usermodel.HSSFWorkbook) HSSFWorkbook(org.apache.poi.hssf.usermodel.HSSFWorkbook) POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) JSONObject(org.json.JSONObject) Sheet(org.apache.poi.ss.usermodel.Sheet) Recon(com.google.refine.model.Recon) HashMap(java.util.HashMap) Map(java.util.Map)

Example 9 with Recon

use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.

the class OdsImporter method parseOneFile.

@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, InputStream inputStream, int limit, JSONObject options, List<Exception> exceptions) {
    OdfDocument odfDoc;
    try {
        odfDoc = OdfDocument.loadDocument(inputStream);
    } catch (Exception e) {
        // Ugh! could they throw any wider exception?
        exceptions.add(e);
        return;
    }
    List<OdfTable> tables = odfDoc.getTableList();
    int[] sheets = JSONUtilities.getIntArray(options, "sheets");
    for (int sheetIndex : sheets) {
        final OdfTable table = tables.get(sheetIndex);
        final int lastRow = table.getRowCount();
        TableDataReader dataReader = new TableDataReader() {

            int nextRow = 0;

            Map<String, Recon> reconMap = new HashMap<String, Recon>();

            @Override
            public List<Object> getNextRowOfCells() throws IOException {
                if (nextRow > lastRow) {
                    return null;
                }
                List<Object> cells = new ArrayList<Object>();
                OdfTableRow row = table.getRowByIndex(nextRow++);
                if (row != null) {
                    int lastCell = row.getCellCount();
                    for (int cellIndex = 0; cellIndex <= lastCell; cellIndex++) {
                        Cell cell = null;
                        OdfTableCell sourceCell = row.getCellByIndex(cellIndex);
                        if (sourceCell != null) {
                            cell = extractCell(sourceCell, reconMap);
                        }
                        cells.add(cell);
                    }
                }
                return cells;
            }
        };
        TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource + "#" + table.getTableName(), limit, options, exceptions);
    }
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) OdfTableCell(org.odftoolkit.odfdom.doc.table.OdfTableCell) OdfDocument(org.odftoolkit.odfdom.doc.OdfDocument) OdfTable(org.odftoolkit.odfdom.doc.table.OdfTable) JSONObject(org.json.JSONObject) Recon(com.google.refine.model.Recon) HashMap(java.util.HashMap) Map(java.util.Map) OdfTableCell(org.odftoolkit.odfdom.doc.table.OdfTableCell) Cell(com.google.refine.model.Cell) OdfTableRow(org.odftoolkit.odfdom.doc.table.OdfTableRow)

Example 10 with Recon

use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.

the class ExcelImporter method extractCell.

protected static Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
    Serializable value = extractCell(cell);
    if (value != null) {
        Recon recon = null;
        Hyperlink hyperlink = cell.getHyperlink();
        if (hyperlink != null) {
            String url = hyperlink.getAddress();
            if (url != null && (url.startsWith("http://") || url.startsWith("https://"))) {
                final String sig = "freebase.com/view";
                int i = url.indexOf(sig);
                if (i > 0) {
                    String id = url.substring(i + sig.length());
                    int q = id.indexOf('?');
                    if (q > 0) {
                        id = id.substring(0, q);
                    }
                    int h = id.indexOf('#');
                    if (h > 0) {
                        id = id.substring(0, h);
                    }
                    if (reconMap.containsKey(id)) {
                        recon = reconMap.get(id);
                        recon.judgmentBatchSize++;
                    } else {
                        recon = new Recon(0, null, null);
                        recon.service = "import";
                        recon.match = new ReconCandidate(id, value.toString(), new String[0], 100);
                        recon.matchRank = 0;
                        recon.judgment = Judgment.Matched;
                        recon.judgmentAction = "auto";
                        recon.judgmentBatchSize = 1;
                        recon.addCandidate(recon.match);
                        reconMap.put(id, recon);
                    }
                }
            }
        }
        return new Cell(value, recon);
    } else {
        return null;
    }
}
Also used : Serializable(java.io.Serializable) Recon(com.google.refine.model.Recon) Cell(com.google.refine.model.Cell) ReconCandidate(com.google.refine.model.ReconCandidate) Hyperlink(org.apache.poi.common.usermodel.Hyperlink)

Aggregations

Recon (com.google.refine.model.Recon)13 Cell (com.google.refine.model.Cell)6 JSONException (org.json.JSONException)5 JSONObject (org.json.JSONObject)5 ReconCandidate (com.google.refine.model.ReconCandidate)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 RowVisitor (com.google.refine.browsing.RowVisitor)2 Column (com.google.refine.model.Column)2 Project (com.google.refine.model.Project)2 Row (com.google.refine.model.Row)2 CellChange (com.google.refine.model.changes.CellChange)2 IOException (java.io.IOException)2 Serializable (java.io.Serializable)2 Map (java.util.Map)2 JSONArray (org.json.JSONArray)2 JSONWriter (org.json.JSONWriter)2 OdfTableCell (org.odftoolkit.odfdom.doc.table.OdfTableCell)2 Engine (com.google.refine.browsing.Engine)1 FilteredRows (com.google.refine.browsing.FilteredRows)1