Search in sources :

Example 6 with PubMedXMLFetcher

use of ubic.gemma.core.loader.entrez.pubmed.PubMedXMLFetcher in project Gemma by PavlidisLab.

the class SimpleExpressionDataLoaderServiceImpl method convert.

@Override
public ExpressionExperiment convert(SimpleExpressionExperimentMetaData metaData, DoubleMatrix<String, String> matrix) {
    if (matrix == null || metaData == null) {
        throw new IllegalArgumentException("One or all of method arguments was null");
    }
    ExpressionExperiment experiment = ExpressionExperiment.Factory.newInstance();
    Taxon taxon = this.convertTaxon(metaData.getTaxon());
    experiment.setName(metaData.getName());
    experiment.setShortName(metaData.getShortName());
    experiment.setDescription(metaData.getDescription());
    experiment.setSource("Import via matrix flat file." + (StringUtils.isBlank(metaData.getSourceUrl()) ? "" : "Downloaded from " + metaData.getSourceUrl()));
    ExperimentalDesign ed = ExperimentalDesign.Factory.newInstance();
    experiment.setExperimentalDesign(ed);
    if (metaData.getPubMedId() != null) {
        PubMedXMLFetcher pubfetch = new PubMedXMLFetcher();
        BibliographicReference ref = pubfetch.retrieveByHTTP(metaData.getPubMedId());
        experiment.setPrimaryPublication(ref);
    }
    QuantitationType quantitationType = this.convertQuantitationType(metaData);
    /* set the quantitation types on the experiment */
    Collection<QuantitationType> qTypes = new HashSet<>();
    qTypes.add(quantitationType);
    experiment.setQuantitationTypes(qTypes);
    Collection<ArrayDesign> arrayDesigns = this.convertArrayDesigns(metaData, matrix);
    // Divide up multiple array designs into multiple BioAssayDimensions.
    Collection<RawExpressionDataVector> allVectors = new HashSet<>();
    Collection<BioAssay> allBioAssays = new HashSet<>();
    Collection<Object> usedDesignElements = new HashSet<>();
    for (ArrayDesign design : arrayDesigns) {
        SimpleExpressionDataLoaderServiceImpl.log.info("Processing " + design);
        DoubleMatrix<String, String> subMatrix = this.getSubMatrixForArrayDesign(matrix, usedDesignElements, design);
        if (subMatrix == null) {
            throw new IllegalStateException("Got a null matix");
        }
        BioAssayDimension bad = this.convertBioAssayDimension(experiment, design, taxon, subMatrix);
        Collection<RawExpressionDataVector> vectors = this.convertDesignElementDataVectors(experiment, bad, design, quantitationType, subMatrix);
        allVectors.addAll(vectors);
        allBioAssays.addAll(bad.getBioAssays());
    }
    // sanity
    if (usedDesignElements.size() != matrix.rows()) {
        SimpleExpressionDataLoaderServiceImpl.log.warn("Some rows of matrix were not matched to any of the given platforms (" + matrix.rows() + " rows, " + usedDesignElements.size() + " found");
    }
    experiment.setRawExpressionDataVectors(allVectors);
    experiment.setBioAssays(allBioAssays);
    return experiment;
}
Also used : ExperimentalDesign(ubic.gemma.model.expression.experiment.ExperimentalDesign) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BibliographicReference(ubic.gemma.model.common.description.BibliographicReference) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) PubMedXMLFetcher(ubic.gemma.core.loader.entrez.pubmed.PubMedXMLFetcher) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 7 with PubMedXMLFetcher

use of ubic.gemma.core.loader.entrez.pubmed.PubMedXMLFetcher in project Gemma by PavlidisLab.

the class MeshTermFetcherCli method doWork.

@Override
protected Exception doWork(String[] args) {
    Exception e = super.processCommandLine(args);
    if (e != null)
        return e;
    PubMedXMLFetcher fetcher = new PubMedXMLFetcher();
    try {
        Collection<Integer> ids = this.readIdsFromFile(file);
        Collection<Integer> chunk = new ArrayList<>();
        for (Integer i : ids) {
            chunk.add(i);
            if (chunk.size() == MeshTermFetcherCli.CHUNK_SIZE) {
                this.processChunk(fetcher, chunk);
                chunk.clear();
            }
        }
        if (!chunk.isEmpty()) {
            this.processChunk(fetcher, chunk);
        }
    } catch (IOException exception) {
        return exception;
    }
    return null;
}
Also used : ArrayList(java.util.ArrayList) PubMedXMLFetcher(ubic.gemma.core.loader.entrez.pubmed.PubMedXMLFetcher) IOException(java.io.IOException) IOException(java.io.IOException)

Aggregations

PubMedXMLFetcher (ubic.gemma.core.loader.entrez.pubmed.PubMedXMLFetcher)7 BibliographicReference (ubic.gemma.model.common.description.BibliographicReference)6 Test (org.junit.Test)3 IndexerTaskCommand (ubic.gemma.core.tasks.maintenance.IndexerTaskCommand)3 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)3 SearchSettings (ubic.gemma.model.common.search.SearchSettings)3 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 ExpressionExperimentBibRefFinder (ubic.gemma.core.loader.entrez.pubmed.ExpressionExperimentBibRefFinder)1 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)1 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)1 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)1 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)1 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)1 BioAssaySet (ubic.gemma.model.expression.experiment.BioAssaySet)1 ExperimentalDesign (ubic.gemma.model.expression.experiment.ExperimentalDesign)1 Taxon (ubic.gemma.model.genome.Taxon)1 Persister (ubic.gemma.persistence.persister.Persister)1 ExpressionExperimentService (ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService)1