use of ubic.gemma.core.loader.expression.geo.model.GeoRecord in project Gemma by PavlidisLab.
the class GeoBrowserTest method testGetGeoRecordsBySearchTerm.
@Test
public void testGetGeoRecordsBySearchTerm() throws Exception {
GeoBrowser b = new GeoBrowser();
try {
Collection<GeoRecord> res = b.getGeoRecordsBySearchTerm("Homo+sapiens[orgn]", 10, 10);
// Check that the search has returned at least one record
assertTrue(res.size() > 0);
// Print out accession numbers etc.; check that the records returned match the search term
for (GeoRecord record : res) {
System.out.println("Accession: " + record.getGeoAccession());
System.out.println("Title : " + record.getTitle());
System.out.println("Number of samples: " + record.getNumSamples());
System.out.println("Date: " + record.getReleaseDate());
assertTrue(record.getOrganisms().contains("Homo sapiens"));
}
} catch (IOException e) {
if (e.getMessage().contains("GEO returned an error")) {
GeoBrowserTest.log.warn("GEO returned an error, skipping test.");
return;
}
throw e;
}
}
use of ubic.gemma.core.loader.expression.geo.model.GeoRecord in project Gemma by PavlidisLab.
the class GeoBrowserTest method testGetRecentGeoRecords.
@Test
public void testGetRecentGeoRecords() throws Exception {
GeoBrowser b = new GeoBrowser();
try {
Collection<GeoRecord> res = b.getRecentGeoRecords(10, 10);
assertTrue(res.size() > 0);
} catch (IOException e) {
if (e.getMessage().contains("GEO returned an error")) {
GeoBrowserTest.log.warn("GEO returned an error, skipping test.");
return;
}
throw e;
}
}
use of ubic.gemma.core.loader.expression.geo.model.GeoRecord in project Gemma by PavlidisLab.
the class GeoBrowserServiceTest method testGetRecentRecords.
@Test
public final void testGetRecentRecords() throws Exception {
try {
// I changed the skip because the very newest records can cause a problem with fetching details.
List<GeoRecord> recentGeoRecords = gbs.getRecentGeoRecords(100, 10);
if (recentGeoRecords.isEmpty()) {
log.warn("Skipping test: no GEO records returned, check test settings");
return;
}
GeoRecord rec = recentGeoRecords.iterator().next();
int oldCount = rec.getPreviousClicks();
String firstAccession = rec.getGeoAccession();
// this should cause the increment.
gbs.getDetails(firstAccession);
recentGeoRecords = gbs.getRecentGeoRecords(11, 10);
/*
* Do this check in case it gets filtered out.
*/
if (recentGeoRecords.size() == 0) {
return;
}
rec = recentGeoRecords.iterator().next();
if (!rec.getGeoAccession().equals(firstAccession)) {
return;
}
int newCount = rec.getPreviousClicks();
assertEquals(oldCount + 1, newCount);
} catch (Exception e) {
if (e.getMessage().contains("500") || e.getMessage().contains("502") || e.getMessage().contains("503") || e.getMessage().contains("GEO returned an error")) {
log.warn("NCBI returned error, skipping test");
return;
}
if (e.getCause() != null && (e.getCause() instanceof UnknownHostException || e.getCause().getMessage().contains("500") || e.getCause().getMessage().contains("502") || e.getCause().getMessage().contains("503"))) {
log.warn("NCBI returned error, skipping test");
return;
}
throw e;
}
}
use of ubic.gemma.core.loader.expression.geo.model.GeoRecord in project Gemma by PavlidisLab.
the class GeoBrowserServiceImpl method filterGeoRecords.
private List<GeoRecord> filterGeoRecords(List<GeoRecord> records) {
ExternalDatabase geo = externalDatabaseService.findByName("GEO");
Collection<GeoRecord> toRemove = new HashSet<>();
assert geo != null;
rec: for (GeoRecord record : records) {
if (record.getNumSamples() < GeoBrowserServiceImpl.MIN_SAMPLES) {
toRemove.add(record);
}
Collection<String> organisms = record.getOrganisms();
if (organisms == null || organisms.size() == 0) {
continue;
}
int i = 0;
for (String string : organisms) {
Taxon t = taxonService.findByCommonName(string);
if (t == null) {
t = taxonService.findByScientificName(string);
if (t == null) {
toRemove.add(record);
continue rec;
}
}
String acc = record.getGeoAccession();
if (organisms.size() > 1) {
acc = acc + "." + i;
}
DatabaseEntry de = DatabaseEntry.Factory.newInstance();
de.setExternalDatabase(geo);
de.setAccession(acc);
Collection<ExpressionExperiment> ee = expressionExperimentService.findByAccession(de);
if (!ee.isEmpty()) {
for (ExpressionExperiment expressionExperiment : ee) {
record.getCorrespondingExperiments().add(expressionExperiment.getId());
}
}
record.setPreviousClicks(localInfo.containsKey(acc) ? localInfo.get(acc).getPreviousClicks() : 0);
record.setUsable(!localInfo.containsKey(acc) || localInfo.get(acc).isUsable());
i++;
}
}
records.removeAll(toRemove);
return records;
}
use of ubic.gemma.core.loader.expression.geo.model.GeoRecord in project Gemma by PavlidisLab.
the class GeoBrowser method getRecentGeoRecords.
/**
* Retrieves and parses tab delimited file from GEO. File contains pageSize GEO records starting from startPage.
*
* @param startPage start page
* @param pageSize page size
* @return list of GeoRecords
* @throws IOException if there is a problem while manipulating the file
* @throws ParseException if there is a parsing problem
*/
public List<GeoRecord> getRecentGeoRecords(int startPage, int pageSize) throws IOException, ParseException {
if (startPage < 0 || pageSize < 0)
throw new IllegalArgumentException("Values must be greater than zero ");
List<GeoRecord> records = new ArrayList<>();
URL url;
try {
url = new URL(GEO_BROWSE_URL + startPage + GEO_BROWSE_SUFFIX + pageSize);
} catch (MalformedURLException e) {
throw new RuntimeException("Invalid URL: " + GEO_BROWSE_URL + startPage + GEO_BROWSE_SUFFIX + pageSize, e);
}
URLConnection conn = url.openConnection();
conn.connect();
try (InputStream is = conn.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is))) {
// We are getting a tab delimited file.
// Read columns headers.
String headerLine = br.readLine();
String[] headers = StringUtil.csvSplit(headerLine);
// Map column names to their indices (handy later).
Map<String, Integer> columnNameToIndex = new HashMap<>();
for (int i = 0; i < headers.length; i++) {
columnNameToIndex.put(headers[i], i);
}
// Read the rest of the file.
String line;
while ((line = br.readLine()) != null) {
String[] fields = StringUtil.csvSplit(line);
GeoRecord geoRecord = new GeoRecord();
geoRecord.setGeoAccession(fields[columnNameToIndex.get("Accession")]);
geoRecord.setTitle(StringUtils.strip(fields[columnNameToIndex.get("Title")].replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, "")));
String sampleCountS = fields[columnNameToIndex.get("Sample Count")];
if (StringUtils.isNotBlank(sampleCountS)) {
try {
geoRecord.setNumSamples(Integer.parseInt(sampleCountS));
} catch (NumberFormatException e) {
throw new RuntimeException("Could not parse sample count: " + sampleCountS);
}
} else {
GeoBrowser.log.warn("No sample count for " + geoRecord.getGeoAccession());
}
geoRecord.setContactName(fields[columnNameToIndex.get("Contact")].replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, ""));
String[] taxons = fields[columnNameToIndex.get("Taxonomy")].replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, "").split(";");
geoRecord.getOrganisms().addAll(Arrays.asList(taxons));
Date date = DateUtils.parseDate(fields[columnNameToIndex.get("Release Date")].replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, ""), DATE_FORMATS);
geoRecord.setReleaseDate(date);
geoRecord.setSeriesType(fields[columnNameToIndex.get("Series Type")]);
records.add(geoRecord);
}
}
if (records.isEmpty()) {
GeoBrowser.log.warn("No records obtained");
}
return records;
}
Aggregations