Search in sources :

Example 1 with GeoRecord

use of ubic.gemma.core.loader.expression.geo.model.GeoRecord in project Gemma by PavlidisLab.

the class GeoBrowserTest method testGetGeoRecordsBySearchTerm.

@Test
public void testGetGeoRecordsBySearchTerm() throws Exception {
    GeoBrowser b = new GeoBrowser();
    try {
        Collection<GeoRecord> res = b.getGeoRecordsBySearchTerm("Homo+sapiens[orgn]", 10, 10);
        // Check that the search has returned at least one record
        assertTrue(res.size() > 0);
        // Print out accession numbers etc.; check that the records returned match the search term
        for (GeoRecord record : res) {
            System.out.println("Accession: " + record.getGeoAccession());
            System.out.println("Title : " + record.getTitle());
            System.out.println("Number of samples: " + record.getNumSamples());
            System.out.println("Date: " + record.getReleaseDate());
            assertTrue(record.getOrganisms().contains("Homo sapiens"));
        }
    } catch (IOException e) {
        if (e.getMessage().contains("GEO returned an error")) {
            GeoBrowserTest.log.warn("GEO returned an error, skipping test.");
            return;
        }
        throw e;
    }
}
Also used : GeoRecord(ubic.gemma.core.loader.expression.geo.model.GeoRecord) IOException(java.io.IOException) GeoBrowser(ubic.gemma.core.loader.expression.geo.service.GeoBrowser) Test(org.junit.Test)

Example 2 with GeoRecord

use of ubic.gemma.core.loader.expression.geo.model.GeoRecord in project Gemma by PavlidisLab.

the class GeoBrowserTest method testGetRecentGeoRecords.

@Test
public void testGetRecentGeoRecords() throws Exception {
    GeoBrowser b = new GeoBrowser();
    try {
        Collection<GeoRecord> res = b.getRecentGeoRecords(10, 10);
        assertTrue(res.size() > 0);
    } catch (IOException e) {
        if (e.getMessage().contains("GEO returned an error")) {
            GeoBrowserTest.log.warn("GEO returned an error, skipping test.");
            return;
        }
        throw e;
    }
}
Also used : GeoRecord(ubic.gemma.core.loader.expression.geo.model.GeoRecord) IOException(java.io.IOException) GeoBrowser(ubic.gemma.core.loader.expression.geo.service.GeoBrowser) Test(org.junit.Test)

Example 3 with GeoRecord

use of ubic.gemma.core.loader.expression.geo.model.GeoRecord in project Gemma by PavlidisLab.

the class GeoBrowserServiceTest method testGetRecentRecords.

@Test
public final void testGetRecentRecords() throws Exception {
    try {
        // I changed the skip because the very newest records can cause a problem with fetching details.
        List<GeoRecord> recentGeoRecords = gbs.getRecentGeoRecords(100, 10);
        if (recentGeoRecords.isEmpty()) {
            log.warn("Skipping test: no GEO records returned, check test settings");
            return;
        }
        GeoRecord rec = recentGeoRecords.iterator().next();
        int oldCount = rec.getPreviousClicks();
        String firstAccession = rec.getGeoAccession();
        // this should cause the increment.
        gbs.getDetails(firstAccession);
        recentGeoRecords = gbs.getRecentGeoRecords(11, 10);
        /*
             * Do this check in case it gets filtered out.
             */
        if (recentGeoRecords.size() == 0) {
            return;
        }
        rec = recentGeoRecords.iterator().next();
        if (!rec.getGeoAccession().equals(firstAccession)) {
            return;
        }
        int newCount = rec.getPreviousClicks();
        assertEquals(oldCount + 1, newCount);
    } catch (Exception e) {
        if (e.getMessage().contains("500") || e.getMessage().contains("502") || e.getMessage().contains("503") || e.getMessage().contains("GEO returned an error")) {
            log.warn("NCBI returned error, skipping test");
            return;
        }
        if (e.getCause() != null && (e.getCause() instanceof UnknownHostException || e.getCause().getMessage().contains("500") || e.getCause().getMessage().contains("502") || e.getCause().getMessage().contains("503"))) {
            log.warn("NCBI returned error, skipping test");
            return;
        }
        throw e;
    }
}
Also used : GeoRecord(ubic.gemma.core.loader.expression.geo.model.GeoRecord) UnknownHostException(java.net.UnknownHostException) UnknownHostException(java.net.UnknownHostException) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest) Test(org.junit.Test)

Example 4 with GeoRecord

use of ubic.gemma.core.loader.expression.geo.model.GeoRecord in project Gemma by PavlidisLab.

the class GeoBrowserServiceImpl method filterGeoRecords.

private List<GeoRecord> filterGeoRecords(List<GeoRecord> records) {
    ExternalDatabase geo = externalDatabaseService.findByName("GEO");
    Collection<GeoRecord> toRemove = new HashSet<>();
    assert geo != null;
    rec: for (GeoRecord record : records) {
        if (record.getNumSamples() < GeoBrowserServiceImpl.MIN_SAMPLES) {
            toRemove.add(record);
        }
        Collection<String> organisms = record.getOrganisms();
        if (organisms == null || organisms.size() == 0) {
            continue;
        }
        int i = 0;
        for (String string : organisms) {
            Taxon t = taxonService.findByCommonName(string);
            if (t == null) {
                t = taxonService.findByScientificName(string);
                if (t == null) {
                    toRemove.add(record);
                    continue rec;
                }
            }
            String acc = record.getGeoAccession();
            if (organisms.size() > 1) {
                acc = acc + "." + i;
            }
            DatabaseEntry de = DatabaseEntry.Factory.newInstance();
            de.setExternalDatabase(geo);
            de.setAccession(acc);
            Collection<ExpressionExperiment> ee = expressionExperimentService.findByAccession(de);
            if (!ee.isEmpty()) {
                for (ExpressionExperiment expressionExperiment : ee) {
                    record.getCorrespondingExperiments().add(expressionExperiment.getId());
                }
            }
            record.setPreviousClicks(localInfo.containsKey(acc) ? localInfo.get(acc).getPreviousClicks() : 0);
            record.setUsable(!localInfo.containsKey(acc) || localInfo.get(acc).isUsable());
            i++;
        }
    }
    records.removeAll(toRemove);
    return records;
}
Also used : GeoRecord(ubic.gemma.core.loader.expression.geo.model.GeoRecord) ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) Taxon(ubic.gemma.model.genome.Taxon) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment)

Example 5 with GeoRecord

use of ubic.gemma.core.loader.expression.geo.model.GeoRecord in project Gemma by PavlidisLab.

the class GeoBrowser method getRecentGeoRecords.

/**
 * Retrieves and parses tab delimited file from GEO. File contains pageSize GEO records starting from startPage.
 *
 * @param startPage start page
 * @param pageSize  page size
 * @return list of GeoRecords
 * @throws IOException    if there is a problem while manipulating the file
 * @throws ParseException if there is a parsing problem
 */
public List<GeoRecord> getRecentGeoRecords(int startPage, int pageSize) throws IOException, ParseException {
    if (startPage < 0 || pageSize < 0)
        throw new IllegalArgumentException("Values must be greater than zero ");
    List<GeoRecord> records = new ArrayList<>();
    URL url;
    try {
        url = new URL(GEO_BROWSE_URL + startPage + GEO_BROWSE_SUFFIX + pageSize);
    } catch (MalformedURLException e) {
        throw new RuntimeException("Invalid URL: " + GEO_BROWSE_URL + startPage + GEO_BROWSE_SUFFIX + pageSize, e);
    }
    URLConnection conn = url.openConnection();
    conn.connect();
    try (InputStream is = conn.getInputStream();
        BufferedReader br = new BufferedReader(new InputStreamReader(is))) {
        // We are getting a tab delimited file.
        // Read columns headers.
        String headerLine = br.readLine();
        String[] headers = StringUtil.csvSplit(headerLine);
        // Map column names to their indices (handy later).
        Map<String, Integer> columnNameToIndex = new HashMap<>();
        for (int i = 0; i < headers.length; i++) {
            columnNameToIndex.put(headers[i], i);
        }
        // Read the rest of the file.
        String line;
        while ((line = br.readLine()) != null) {
            String[] fields = StringUtil.csvSplit(line);
            GeoRecord geoRecord = new GeoRecord();
            geoRecord.setGeoAccession(fields[columnNameToIndex.get("Accession")]);
            geoRecord.setTitle(StringUtils.strip(fields[columnNameToIndex.get("Title")].replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, "")));
            String sampleCountS = fields[columnNameToIndex.get("Sample Count")];
            if (StringUtils.isNotBlank(sampleCountS)) {
                try {
                    geoRecord.setNumSamples(Integer.parseInt(sampleCountS));
                } catch (NumberFormatException e) {
                    throw new RuntimeException("Could not parse sample count: " + sampleCountS);
                }
            } else {
                GeoBrowser.log.warn("No sample count for " + geoRecord.getGeoAccession());
            }
            geoRecord.setContactName(fields[columnNameToIndex.get("Contact")].replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, ""));
            String[] taxons = fields[columnNameToIndex.get("Taxonomy")].replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, "").split(";");
            geoRecord.getOrganisms().addAll(Arrays.asList(taxons));
            Date date = DateUtils.parseDate(fields[columnNameToIndex.get("Release Date")].replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, ""), DATE_FORMATS);
            geoRecord.setReleaseDate(date);
            geoRecord.setSeriesType(fields[columnNameToIndex.get("Series Type")]);
            records.add(geoRecord);
        }
    }
    if (records.isEmpty()) {
        GeoBrowser.log.warn("No records obtained");
    }
    return records;
}
Also used : MalformedURLException(java.net.MalformedURLException) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) URL(java.net.URL) URLConnection(java.net.URLConnection) GeoRecord(ubic.gemma.core.loader.expression.geo.model.GeoRecord) BufferedReader(java.io.BufferedReader)

Aggregations

GeoRecord (ubic.gemma.core.loader.expression.geo.model.GeoRecord)8 IOException (java.io.IOException)4 Test (org.junit.Test)3 InputStream (java.io.InputStream)2 URL (java.net.URL)2 URLConnection (java.net.URLConnection)2 ParseException (java.text.ParseException)2 GeoBrowser (ubic.gemma.core.loader.expression.geo.service.GeoBrowser)2 BufferedReader (java.io.BufferedReader)1 InputStreamReader (java.io.InputStreamReader)1 MalformedURLException (java.net.MalformedURLException)1 UnknownHostException (java.net.UnknownHostException)1 HashSet (java.util.HashSet)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)1 Document (org.w3c.dom.Document)1 Element (org.w3c.dom.Element)1 Node (org.w3c.dom.Node)1 NodeList (org.w3c.dom.NodeList)1 SAXException (org.xml.sax.SAXException)1