use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.
the class Pool method save.
public void save(Writer writer) throws IOException {
writer.write(RefineServlet.VERSION);
writer.write('\n');
Properties options = new Properties();
options.setProperty("mode", "save");
options.put("pool", this);
Collection<Recon> recons2 = recons.values();
writer.write("reconCount=" + recons2.size());
writer.write('\n');
for (Recon recon : recons2) {
JSONWriter jsonWriter = new JSONWriter(writer);
try {
recon.write(jsonWriter, options);
writer.write('\n');
} catch (JSONException e) {
e.printStackTrace();
}
}
}
use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.
the class StandardReconConfig method createReconServiceResults.
protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
try {
int length = results.length();
int count = 0;
for (int i = 0; i < length; i++) {
JSONObject result = results.getJSONObject(i);
if (!result.has("name")) {
continue;
}
JSONArray types = result.getJSONArray("type");
String[] typeIDs = new String[types.length()];
for (int j = 0; j < typeIDs.length; j++) {
Object type = types.get(j);
typeIDs[j] = type instanceof String ? (String) type : ((JSONObject) type).getString("id");
}
double score = result.getDouble("score");
ReconCandidate candidate = new ReconCandidate(result.getString("id"), result.getString("name"), typeIDs, score);
if (autoMatch && i == 0 && result.has("match") && result.getBoolean("match")) {
recon.match = candidate;
recon.matchRank = 0;
recon.judgment = Judgment.Matched;
recon.judgmentAction = "auto";
}
recon.addCandidate(candidate);
count++;
}
if (count > 0) {
ReconCandidate candidate = recon.candidates.get(0);
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name));
recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(StringUtils.lowerCase(text), StringUtils.lowerCase(candidate.name)));
recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.name));
recon.setFeature(Recon.Feature_typeMatch, false);
if (this.typeID != null) {
for (String typeID : candidate.types) {
if (this.typeID.equals(typeID)) {
recon.setFeature(Recon.Feature_typeMatch, true);
break;
}
}
}
}
} catch (JSONException e) {
e.printStackTrace();
}
return recon;
}
use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.
the class ExcelImporter method parseOneFile.
@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, InputStream inputStream, int limit, JSONObject options, List<Exception> exceptions) {
Workbook wb = null;
if (!inputStream.markSupported()) {
inputStream = new PushbackInputStream(inputStream, 8);
}
try {
wb = POIXMLDocument.hasOOXMLHeader(inputStream) ? new XSSFWorkbook(inputStream) : new HSSFWorkbook(new POIFSFileSystem(inputStream));
} catch (IOException e) {
exceptions.add(new ImportException("Attempted to parse as an Excel file but failed. " + "Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.", e));
return;
} catch (ArrayIndexOutOfBoundsException e) {
exceptions.add(new ImportException("Attempted to parse file as an Excel file but failed. " + "This is probably caused by a corrupt excel file, or due to the file having previously been created or saved by a non-Microsoft application. " + "Please try opening the file in Microsoft Excel and resaving it, then try re-uploading the file. " + "See https://issues.apache.org/bugzilla/show_bug.cgi?id=48261 for further details", e));
return;
} catch (IllegalArgumentException e) {
exceptions.add(new ImportException("Attempted to parse as an Excel file but failed. " + "Only Excel 97 and later formats are supported.", e));
return;
} catch (POIXMLException e) {
exceptions.add(new ImportException("Attempted to parse as an Excel file but failed. " + "Invalid XML.", e));
return;
}
int[] sheets = JSONUtilities.getIntArray(options, "sheets");
for (int sheetIndex : sheets) {
final Sheet sheet = wb.getSheetAt(sheetIndex);
final int lastRow = sheet.getLastRowNum();
TableDataReader dataReader = new TableDataReader() {
int nextRow = 0;
Map<String, Recon> reconMap = new HashMap<String, Recon>();
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (nextRow > lastRow) {
return null;
}
List<Object> cells = new ArrayList<Object>();
org.apache.poi.ss.usermodel.Row row = sheet.getRow(nextRow++);
if (row != null) {
short lastCell = row.getLastCellNum();
for (short cellIndex = 0; cellIndex < lastCell; cellIndex++) {
Cell cell = null;
org.apache.poi.ss.usermodel.Cell sourceCell = row.getCell(cellIndex);
if (sourceCell != null) {
cell = extractCell(sourceCell, reconMap);
}
cells.add(cell);
}
}
return cells;
}
};
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource + "#" + sheet.getSheetName(), limit, options, exceptions);
}
}
use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.
the class OdsImporter method parseOneFile.
@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, InputStream inputStream, int limit, JSONObject options, List<Exception> exceptions) {
OdfDocument odfDoc;
try {
odfDoc = OdfDocument.loadDocument(inputStream);
} catch (Exception e) {
// Ugh! could they throw any wider exception?
exceptions.add(e);
return;
}
List<OdfTable> tables = odfDoc.getTableList();
int[] sheets = JSONUtilities.getIntArray(options, "sheets");
for (int sheetIndex : sheets) {
final OdfTable table = tables.get(sheetIndex);
final int lastRow = table.getRowCount();
TableDataReader dataReader = new TableDataReader() {
int nextRow = 0;
Map<String, Recon> reconMap = new HashMap<String, Recon>();
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (nextRow > lastRow) {
return null;
}
List<Object> cells = new ArrayList<Object>();
OdfTableRow row = table.getRowByIndex(nextRow++);
if (row != null) {
int lastCell = row.getCellCount();
for (int cellIndex = 0; cellIndex <= lastCell; cellIndex++) {
Cell cell = null;
OdfTableCell sourceCell = row.getCellByIndex(cellIndex);
if (sourceCell != null) {
cell = extractCell(sourceCell, reconMap);
}
cells.add(cell);
}
}
return cells;
}
};
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource + "#" + table.getTableName(), limit, options, exceptions);
}
}
use of com.google.refine.model.Recon in project OpenRefine by OpenRefine.
the class ExcelImporter method extractCell.
protected static Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
Serializable value = extractCell(cell);
if (value != null) {
Recon recon = null;
Hyperlink hyperlink = cell.getHyperlink();
if (hyperlink != null) {
String url = hyperlink.getAddress();
if (url != null && (url.startsWith("http://") || url.startsWith("https://"))) {
final String sig = "freebase.com/view";
int i = url.indexOf(sig);
if (i > 0) {
String id = url.substring(i + sig.length());
int q = id.indexOf('?');
if (q > 0) {
id = id.substring(0, q);
}
int h = id.indexOf('#');
if (h > 0) {
id = id.substring(0, h);
}
if (reconMap.containsKey(id)) {
recon = reconMap.get(id);
recon.judgmentBatchSize++;
} else {
recon = new Recon(0, null, null);
recon.service = "import";
recon.match = new ReconCandidate(id, value.toString(), new String[0], 100);
recon.matchRank = 0;
recon.judgment = Judgment.Matched;
recon.judgmentAction = "auto";
recon.judgmentBatchSize = 1;
recon.addCandidate(recon.match);
reconMap.put(id, recon);
}
}
}
}
return new Cell(value, recon);
} else {
return null;
}
}
Aggregations