use of com.google.refine.model.recon.StandardReconConfig in project OpenRefine by OpenRefine.
the class PreviewExtendDataCommand method doPost.
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
if (!hasValidCSRFToken(request)) {
respondCSRFError(response);
return;
}
try {
Project project = getProject(request);
String columnName = request.getParameter("columnName");
String rowIndicesString = request.getParameter("rowIndices");
if (rowIndicesString == null) {
respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }");
return;
}
String jsonString = request.getParameter("extension");
DataExtensionConfig config = DataExtensionConfig.reconstruct(jsonString);
List<Integer> rowIndices = ParsingUtilities.mapper.readValue(rowIndicesString, new TypeReference<List<Integer>>() {
});
int length = rowIndices.size();
Column column = project.columnModel.getColumnByName(columnName);
int cellIndex = column.getCellIndex();
// get the endpoint to extract data from
String endpoint = null;
ReconConfig cfg = column.getReconConfig();
if (cfg != null && cfg instanceof StandardReconConfig) {
StandardReconConfig scfg = (StandardReconConfig) cfg;
endpoint = scfg.service;
} else {
respond(response, "{ \"code\" : \"error\", \"message\" : \"This column has not been reconciled with a standard service.\" }");
return;
}
List<String> topicNames = new ArrayList<String>();
List<String> topicIds = new ArrayList<String>();
Set<String> ids = new HashSet<String>();
for (int i = 0; i < length; i++) {
int rowIndex = rowIndices.get(i);
if (rowIndex >= 0 && rowIndex < project.rows.size()) {
Row row = project.rows.get(rowIndex);
Cell cell = row.getCell(cellIndex);
if (cell != null && cell.recon != null && cell.recon.match != null) {
topicNames.add(cell.recon.match.name);
topicIds.add(cell.recon.match.id);
ids.add(cell.recon.match.id);
} else {
topicNames.add(null);
topicIds.add(null);
ids.add(null);
}
}
}
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
ReconciledDataExtensionJob job = new ReconciledDataExtensionJob(config, endpoint);
Map<String, DataExtension> map = job.extend(ids, reconCandidateMap);
List<List<Object>> rows = new ArrayList<>();
for (int r = 0; r < topicNames.size(); r++) {
String id = topicIds.get(r);
String topicName = topicNames.get(r);
if (id != null && map.containsKey(id)) {
DataExtension ext = map.get(id);
boolean first = true;
if (ext.data.length > 0) {
for (Object[] row : ext.data) {
List<Object> jsonRow = new ArrayList<>();
if (first) {
jsonRow.add(topicName);
first = false;
} else {
jsonRow.add(null);
}
for (Object cell : row) {
jsonRow.add(cell);
}
rows.add(jsonRow);
}
continue;
}
}
List<Object> supplement = new ArrayList<>();
if (id != null) {
supplement.add(new ReconCandidate(id, topicName, new String[0], 100));
} else {
supplement.add("<not reconciled>");
}
rows.add(supplement);
}
respondJSON(response, new PreviewResponse(job.columns, rows));
} catch (Exception e) {
respondException(response, e);
}
}
use of com.google.refine.model.recon.StandardReconConfig in project OpenRefine by OpenRefine.
the class NewEntityLibrary method updateReconciledCells.
/**
* Changes the "new" reconciled cells to their allocated ids for later use.
*
* @param reset:
* set to true to revert the operation (set cells to "new")
*/
public void updateReconciledCells(Project project, boolean reset) {
Set<Integer> impactedColumns = new HashSet<>();
for (Row row : project.rows) {
for (int i = 0; i != row.cells.size(); i++) {
Cell cell = row.cells.get(i);
if (cell == null || cell.recon == null) {
continue;
}
Recon recon = cell.recon;
boolean changed = false;
if (Recon.Judgment.New.equals(recon.judgment) && !reset && map.containsKey(recon.id)) {
recon.judgment = Recon.Judgment.Matched;
recon.match = new ReconCandidate(map.get(recon.id), cell.value.toString(), new String[0], 100);
recon.addCandidate(recon.match);
changed = true;
} else if (Recon.Judgment.Matched.equals(recon.judgment) && reset && map.containsKey(recon.id)) {
recon.judgment = Recon.Judgment.New;
if (recon.candidates != null) {
recon.candidates.remove(recon.candidates.size() - 1);
}
recon.match = null;
changed = true;
}
if (changed) {
impactedColumns.add(i);
// Compute features
Column column = project.columnModel.getColumnByCellIndex(i);
ReconConfig config = column.getReconConfig();
if (config instanceof StandardReconConfig) {
StandardReconConfig stdConfig = (StandardReconConfig) config;
if (cell.getValue() instanceof String) {
stdConfig.computeFeatures(recon, (String) cell.getValue());
}
}
}
}
}
// Update reconciliation statistics for impacted columns
for (Integer colId : impactedColumns) {
Column column = project.columnModel.getColumnByCellIndex(colId);
column.setReconStats(ReconStats.create(project, colId));
}
}
use of com.google.refine.model.recon.StandardReconConfig in project OpenRefine by OpenRefine.
the class WikitextImporter method parseOneFile.
@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, Reader reader, int limit, ObjectNode options, List<Exception> exceptions) {
// Set-up a simple wiki configuration
ParserConfig parserConfig = new SimpleParserConfig();
try {
// Encoding validation
WikitextEncodingValidator v = new WikitextEncodingValidator();
String wikitext = CharStreams.toString(reader);
String title = "Page title";
ValidatedWikitext validated = v.validate(parserConfig, wikitext, title);
// Pre-processing
WikitextPreprocessor prep = new WikitextPreprocessor(parserConfig);
WtPreproWikitextPage prepArticle = (WtPreproWikitextPage) prep.parseArticle(validated, title, false);
// Parsing
PreprocessedWikitext ppw = PreprocessorToParserTransformer.transform(prepArticle);
WikitextParser parser = new WikitextParser(parserConfig);
WtParsedWikitextPage parsedArticle;
parsedArticle = (WtParsedWikitextPage) parser.parseArticle(ppw, title);
// Compile the retrieved page
boolean blankSpanningCells = JSONUtilities.getBoolean(options, "blankSpanningCells", true);
boolean includeRawTemplates = JSONUtilities.getBoolean(options, "includeRawTemplates", false);
boolean parseReferences = JSONUtilities.getBoolean(options, "parseReferences", true);
final WikitextTableVisitor vs = new WikitextTableVisitor(blankSpanningCells, includeRawTemplates);
vs.go(parsedArticle);
WikiTableDataReader dataReader = new WikiTableDataReader(vs, parseReferences);
// Reconcile if needed
String wikiUrl = JSONUtilities.getString(options, "wikiUrl", null);
// Wikidata reconciliation endpoint, hardcoded because the user might not have it in its services
String reconUrl = JSONUtilities.getString(options, "reconService", "https://wikidata.reconci.link/en/api");
StandardReconConfig cfg = getReconConfig(reconUrl);
if (wikiUrl != null) {
dataReader.reconcileToQids(wikiUrl, cfg);
}
// Set metadata
if (vs.caption != null && vs.caption.length() > 0) {
metadata.setName(vs.caption);
// TODO this does not seem to do anything - maybe we need to pass it to OpenRefine in some other way?
}
TabularImportingParserBase.readTable(project, job, dataReader, limit, options, exceptions);
// Add reconciliation statistics
if (dataReader.columnReconciled != null) {
for (int i = 0; i != dataReader.columnReconciled.size(); i++) {
if (dataReader.columnReconciled.get(i)) {
Column col = project.columnModel.columns.get(i);
col.setReconStats(ReconStats.create(project, i));
col.setReconConfig(cfg);
}
}
}
} catch (IOException e1) {
e1.printStackTrace();
} catch (ParseException e1) {
exceptions.add(e1);
e1.printStackTrace();
}
}
use of com.google.refine.model.recon.StandardReconConfig in project OpenRefine by OpenRefine.
the class NewEntityLibraryTest method testUpdateReconciledCells.
@Test
public void testUpdateReconciledCells() {
Project project = createCSVProject(TestingData.inceptionWithNewCsv);
StandardReconConfig config = new StandardReconConfig("http://my.endpoint", "http://my.schema", "http://my.schema", "Q5", "human", true, Collections.emptyList());
project.columnModel.columns.get(0).setReconConfig(config);
project.rows.get(0).cells.set(0, TestingData.makeNewItemCell(3289L, "University of Ljubljana"));
project.rows.get(1).cells.set(0, TestingData.makeMatchedCell("Q865528", "University of Warwick"));
project.rows.get(2).cells.set(0, TestingData.makeNewItemCell(1234L, "new uni"));
isNewTo(3289L, project.rows.get(0).cells.get(0));
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
isNewTo(1234L, project.rows.get(2).cells.get(0));
library.updateReconciledCells(project, false);
Cell firstCell = project.rows.get(0).cells.get(0);
isMatchedTo("Q384", firstCell);
assertTrue((Boolean) firstCell.recon.getFeature(Recon.Feature_nameMatch));
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
isMatchedTo("Q345", project.rows.get(2).cells.get(0));
assertTrue(project.rows.get(2).cells.get(0).recon.getFeature(Recon.Feature_nameLevenshtein).equals(0));
library.updateReconciledCells(project, true);
isNewTo(3289L, project.rows.get(0).cells.get(0));
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
isNewTo(1234L, project.rows.get(2).cells.get(0));
}
use of com.google.refine.model.recon.StandardReconConfig in project OpenRefine by OpenRefine.
the class ReconJudgeOneCellCommandTest method setUp.
@BeforeMethod
public void setUp() {
project = createCSVProject("reconciled column,unreconciled column\n" + "a,b\n" + "c,d\n");
Column reconciled = project.columnModel.columns.get(0);
ReconConfig config = new StandardReconConfig("http://my.recon.service/api", "http://my.recon.service/rdf/space", "http://my.recon.service/rdf/schema", "type3894", "octopus", true, Collections.emptyList(), 5);
reconciled.setReconConfig(config);
request = mock(HttpServletRequest.class);
response = mock(HttpServletResponse.class);
when(request.getParameter("project")).thenReturn(String.valueOf(project.id));
when(request.getParameter("csrf_token")).thenReturn(Command.csrfFactory.getFreshToken());
writer = mock(PrintWriter.class);
try {
when(response.getWriter()).thenReturn(writer);
} catch (IOException e1) {
Assert.fail();
}
command = new ReconJudgeOneCellCommand();
}
Aggregations