use of ubic.gemma.core.loader.entrez.pubmed.ExpressionExperimentBibRefFinder in project Gemma by PavlidisLab.
the class ExpressionExperimentPrimaryPubCli method doWork.
@Override
protected Exception doWork(String[] args) {
Exception err = processCommandLine(args);
if (err != null)
return err;
ExpressionExperimentService ees = this.getBean(ExpressionExperimentService.class);
Persister ph = this.getPersisterHelper();
PubMedXMLFetcher fetcher = new PubMedXMLFetcher();
// collect some statistics
Collection<String> nullPubCount = new ArrayList<>();
Collection<String> samePubCount = new ArrayList<>();
Collection<String> diffPubCount = new ArrayList<>();
Collection<String> failedEe = new ArrayList<>();
ExpressionExperimentBibRefFinder finder = new ExpressionExperimentBibRefFinder();
for (BioAssaySet bioassay : expressionExperiments) {
if (!(bioassay instanceof ExpressionExperiment)) {
log.info(bioassay.getName() + " is not an ExpressionExperiment");
continue;
}
ExpressionExperiment experiment = (ExpressionExperiment) bioassay;
// if ( experiment.getPrimaryPublication() != null ) continue;
if (experiment.getPrimaryPublication() == null) {
log.warn(experiment + " has no existing primary publication");
}
experiment = ees.thawLite(experiment);
// get from GEO or get from a file
BibliographicReference ref = fetcher.retrieveByHTTP(pubmedIds.get(experiment.getShortName()));
if (ref == null) {
if (this.pubmedIdFilename != null) {
log.warn("Pubmed ID for " + experiment.getShortName() + " was not found in " + this.pubmedIdFilename);
}
ref = finder.locatePrimaryReference(experiment);
if (ref == null) {
log.error("No ref for " + experiment);
failedEe.add(experiment.getShortName());
continue;
}
}
// collect some statistics
if (experiment.getPrimaryPublication() == null) {
nullPubCount.add(experiment.getShortName());
} else if (experiment.getPrimaryPublication().getPubAccession().getAccession().equals(pubmedIds.get(experiment.getShortName()).toString())) {
samePubCount.add(experiment.getShortName());
} else {
diffPubCount.add(experiment.getShortName());
}
try {
log.info("Found pubAccession " + ref.getPubAccession().getAccession() + " for " + experiment);
ref = (BibliographicReference) ph.persist(ref);
experiment.setPrimaryPublication(ref);
ees.update(experiment);
} catch (Exception e) {
log.error(experiment.getShortName() + " (id=" + experiment.getId() + ") update failed.");
e.printStackTrace();
}
}
// print statistics
log.info("\n\n========== Summary ==========");
log.info("Total number of experiments: " + expressionExperiments.size());
log.info("Same publication: " + samePubCount.size());
log.info("Diff publication: " + diffPubCount.size());
log.info("No initial publication: " + nullPubCount.size());
log.info("No publications found: " + failedEe.size());
log.info("\n\n========== Details ==========");
log.info("Diff publication: " + Arrays.toString(diffPubCount.toArray()));
log.info("No initial publication: " + Arrays.toString(nullPubCount.toArray()));
log.info("No publications found: " + Arrays.toString(failedEe.toArray()));
return null;
}
Aggregations