use of org.globalbioticinteractions.doi.MalformedDOIException in project eol-globi-data by jhpoelen.
the class CitationUtil method getDOI.
public static DOI getDOI(Dataset dataset) {
String doi = dataset.getOrDefault("doi", "");
DOI doiObj = null;
URI archiveURI = dataset.getArchiveURI();
if (StringUtils.isBlank(doi)) {
String recordZenodo = StringUtils.replace(archiveURI.toString(), ZENODO_URL_PREFIX, "");
String[] split = recordZenodo.split("/");
if (split.length > 0) {
doiObj = new DOI("5281", "zenodo." + split[0]);
}
}
try {
doiObj = doiObj == null ? DOI.create(doi) : doiObj;
} catch (MalformedDOIException e) {
LOG.warn("found malformed doi [" + doi + "]", e);
}
return doiObj;
}
use of org.globalbioticinteractions.doi.MalformedDOIException in project eol-globi-data by jhpoelen.
the class DOIResolverImpl method requestLinks.
private Map<String, DOI> requestLinks(Collection<String> references) throws IOException {
Map<String, DOI> doiMap = new TreeMap<>();
for (String reference : references) {
try {
URIBuilder builder = new URIBuilder(baseURL + "/works");
builder.addParameter("sort", "score");
builder.addParameter("order", "desc");
builder.addParameter("rows", "1");
builder.addParameter("select", "DOI,score");
builder.addParameter("query.bibliographic", reference);
HttpGet get = new HttpGet(builder.build());
get.setHeader("Content-Type", "application/json");
doiMap.put(reference, getMostRelevantDOIMatch(get));
} catch (URISyntaxException e) {
LOG.warn("unexpected malformed URI on resolving crossref dois", e);
} catch (MalformedDOIException e) {
LOG.warn("received malformed doi from cross ref", e);
}
}
return doiMap;
}
use of org.globalbioticinteractions.doi.MalformedDOIException in project eol-globi-data by jhpoelen.
the class DOIResolverCache method init.
public void init(final Reader reader) throws PropertyEnricherException, IOException {
DB db = initDb("doiCache");
StopWatch watch = new StopWatch();
watch.start();
final CSVParse parser = CSVTSVUtil.createTSVParser(reader);
if (db.exists("doiCache")) {
LOG.info("reusing existing doi cache...");
} else {
LOG.info("doi cache building...");
doiCitationMap = db.createTreeMap("doiCache").pumpPresort(300000).pumpIgnoreDuplicates().pumpSource(new Iterator<Fun.Tuple2<String, DOI>>() {
private String[] line = null;
final AtomicBoolean nextLineParsed = new AtomicBoolean(false);
String getCitation(String[] line) {
return line != null && line.length > 1 ? line[1] : null;
}
DOI getDOI(String[] line) {
String doiString = line[0];
try {
return StringUtils.isBlank(doiString) ? null : DOI.create(doiString);
} catch (MalformedDOIException e) {
LOG.warn("skipping malformed doi [" + doiString + "]", e);
return null;
}
}
@Override
public boolean hasNext() {
try {
while (!nextLineParsed.get()) {
line = parser.getLine();
if (line == null) {
break;
}
nextLineParsed.set(getDOI(line) != null && StringUtils.isNotBlank(getCitation(line)));
}
return line != null && nextLineParsed.get();
} catch (IOException e) {
LOG.error("problem reading", e);
return false;
}
}
@Override
public Fun.Tuple2<String, DOI> next() {
String citationString = StringUtils.defaultString(getCitation(line), "");
DOI doi = getDOI(line);
nextLineParsed.set(false);
return new Fun.Tuple2<>(citationString, doi);
}
}).make();
db.commit();
watch.stop();
LOG.info("doi cache built in [" + watch.getTime(TimeUnit.SECONDS) + "] s.");
}
}
use of org.globalbioticinteractions.doi.MalformedDOIException in project eol-globi-data by jhpoelen.
the class DatasetImporterForPensoft method findCitation.
private static String findCitation(SparqlClient openBiodivClient, String bindStatement) throws IOException {
String sparql = "PREFIX fabio: <http://purl.org/spar/fabio/>\n" + "PREFIX prism: <http://prismstandard.org/namespaces/basic/2.0/>\n" + "PREFIX doco: <http://purl.org/spar/doco/>\n" + "PREFIX dc: <http://purl.org/dc/elements/1.1/>\n" + "SELECT " + " ?article " + " ?title " + " ?doi " + " (group_concat(distinct ?authorName; separator=\", \") as ?authorsList) " + " ( REPLACE(str(?pubDate), \"(\\\\d*)-.*\", \"$1\") as ?pubYear) " + " ?journalName " + "WHERE { \n" + bindStatement + " ?article a fabio:JournalArticle.\n" + " ?article prism:doi ?doi.\n" + " ?article dc:title ?title.\n" + " ?article prism:publicationDate ?pubDate.\n" + " ?article <http://purl.org/vocab/frbr/core#realizationOf> ?paper.\n" + " ?paper dc:creator ?author.\n" + " ?journal <http://purl.org/vocab/frbr/core#part> ?article.\n" + " ?journal a fabio:Journal.\n" + " ?journal <http://www.w3.org/2004/02/skos/core#prefLabel> ?journalName.\n" + " ?author <http://www.w3.org/2000/01/rdf-schema#label> ?authorName.\n" + "} GROUP BY ?article ?title ?doi ?pubDate ?journalName \n" + " LIMIT 1";
try {
String citation = null;
final LabeledCSVParser parser = openBiodivClient.query(sparql);
parser.getLine();
String doi = parser.getValueByLabel("doi");
if (StringUtils.isNotBlank(doi)) {
final String doiURIString = DOI.create(doi).toURI().toString();
citation = StringUtils.join(Arrays.asList(parser.getValueByLabel("authorsList"), parser.getValueByLabel("pubYear"), parser.getValueByLabel("title"), parser.getValueByLabel("journalName"), doiURIString), ". ");
}
return citation;
} catch (MalformedDOIException e) {
throw new IOException("marlformed uri", e);
}
}
Aggregations