use of ubic.gemma.core.loader.entrez.pubmed.PubMedXMLFetcher in project Gemma by PavlidisLab.
the class SimpleExpressionDataLoaderServiceImpl method convert.
@Override
public ExpressionExperiment convert(SimpleExpressionExperimentMetaData metaData, DoubleMatrix<String, String> matrix) {
if (matrix == null || metaData == null) {
throw new IllegalArgumentException("One or all of method arguments was null");
}
ExpressionExperiment experiment = ExpressionExperiment.Factory.newInstance();
Taxon taxon = this.convertTaxon(metaData.getTaxon());
experiment.setName(metaData.getName());
experiment.setShortName(metaData.getShortName());
experiment.setDescription(metaData.getDescription());
experiment.setSource("Import via matrix flat file." + (StringUtils.isBlank(metaData.getSourceUrl()) ? "" : "Downloaded from " + metaData.getSourceUrl()));
ExperimentalDesign ed = ExperimentalDesign.Factory.newInstance();
experiment.setExperimentalDesign(ed);
if (metaData.getPubMedId() != null) {
PubMedXMLFetcher pubfetch = new PubMedXMLFetcher();
BibliographicReference ref = pubfetch.retrieveByHTTP(metaData.getPubMedId());
experiment.setPrimaryPublication(ref);
}
QuantitationType quantitationType = this.convertQuantitationType(metaData);
/* set the quantitation types on the experiment */
Collection<QuantitationType> qTypes = new HashSet<>();
qTypes.add(quantitationType);
experiment.setQuantitationTypes(qTypes);
Collection<ArrayDesign> arrayDesigns = this.convertArrayDesigns(metaData, matrix);
// Divide up multiple array designs into multiple BioAssayDimensions.
Collection<RawExpressionDataVector> allVectors = new HashSet<>();
Collection<BioAssay> allBioAssays = new HashSet<>();
Collection<Object> usedDesignElements = new HashSet<>();
for (ArrayDesign design : arrayDesigns) {
SimpleExpressionDataLoaderServiceImpl.log.info("Processing " + design);
DoubleMatrix<String, String> subMatrix = this.getSubMatrixForArrayDesign(matrix, usedDesignElements, design);
if (subMatrix == null) {
throw new IllegalStateException("Got a null matix");
}
BioAssayDimension bad = this.convertBioAssayDimension(experiment, design, taxon, subMatrix);
Collection<RawExpressionDataVector> vectors = this.convertDesignElementDataVectors(experiment, bad, design, quantitationType, subMatrix);
allVectors.addAll(vectors);
allBioAssays.addAll(bad.getBioAssays());
}
// sanity
if (usedDesignElements.size() != matrix.rows()) {
SimpleExpressionDataLoaderServiceImpl.log.warn("Some rows of matrix were not matched to any of the given platforms (" + matrix.rows() + " rows, " + usedDesignElements.size() + " found");
}
experiment.setRawExpressionDataVectors(allVectors);
experiment.setBioAssays(allBioAssays);
return experiment;
}
use of ubic.gemma.core.loader.entrez.pubmed.PubMedXMLFetcher in project Gemma by PavlidisLab.
the class MeshTermFetcherCli method doWork.
@Override
protected Exception doWork(String[] args) {
Exception e = super.processCommandLine(args);
if (e != null)
return e;
PubMedXMLFetcher fetcher = new PubMedXMLFetcher();
try {
Collection<Integer> ids = this.readIdsFromFile(file);
Collection<Integer> chunk = new ArrayList<>();
for (Integer i : ids) {
chunk.add(i);
if (chunk.size() == MeshTermFetcherCli.CHUNK_SIZE) {
this.processChunk(fetcher, chunk);
chunk.clear();
}
}
if (!chunk.isEmpty()) {
this.processChunk(fetcher, chunk);
}
} catch (IOException exception) {
return exception;
}
return null;
}
Aggregations