use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class ArrayDesignSequenceProcessingServiceImpl method assignSequencesToDesignElements.
@Override
public void assignSequencesToDesignElements(Collection<CompositeSequence> designElements, Collection<BioSequence> sequences) {
Map<String, BioSequence> nameMap = new HashMap<>();
for (BioSequence sequence : sequences) {
nameMap.put(this.deMangleProbeId(sequence.getName()), sequence);
}
int numNotFound = 0;
for (CompositeSequence designElement : designElements) {
if (!nameMap.containsKey(designElement.getName())) {
ArrayDesignSequenceProcessingServiceImpl.log.debug("No sequence matches " + designElement.getName());
numNotFound++;
continue;
}
designElement.setBiologicalCharacteristic(nameMap.get(designElement.getName()));
}
ArrayDesignSequenceProcessingServiceImpl.log.info(sequences.size() + " sequences processed for " + designElements.size() + " design elements");
if (numNotFound > 0) {
ArrayDesignSequenceProcessingServiceImpl.log.warn(numNotFound + " probes had no matching sequence");
}
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class ArrayDesignSequenceProcessingServiceImpl method initializeFetchList.
/**
* @param force if true, sequence will be replaced even if it is already there.
* @return map of biosequence accessions to BioSequences (the existing ones)
*/
private Map<String, BioSequence> initializeFetchList(ArrayDesign arrayDesign, boolean force) {
Map<String, BioSequence> accessionsToFetch = new HashMap<>();
int sequenceProvided = 0;
int noSequence = 0;
boolean warned = false;
for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
BioSequence bs = cs.getBiologicalCharacteristic();
if (bs == null) {
warned = this.warnAboutMissingSequence(noSequence, warned, cs);
noSequence++;
continue;
}
if (!force && StringUtils.isNotBlank(bs.getSequence())) {
sequenceProvided++;
continue;
}
String accession = this.getAccession(cs);
if (accession == null) {
if (ArrayDesignSequenceProcessingServiceImpl.log.isDebugEnabled())
ArrayDesignSequenceProcessingServiceImpl.log.debug("No accession for " + cs + ": " + bs);
continue;
}
accessionsToFetch.put(accession, bs);
}
this.informAboutFetchListResults(arrayDesign, accessionsToFetch, sequenceProvided, noSequence);
return accessionsToFetch;
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class GeoServiceImpl method matchToExistingPlatform.
private void matchToExistingPlatform(GeoConverter geoConverter, GeoPlatform rawGEOPlatform, ArrayDesignsForExperimentCache c) {
// we have to populate this.
Map<String, String> probeNamesInGemma = rawGEOPlatform.getProbeNamesInGemma();
// do a partial conversion. We will throw this away;
ArrayDesign geoArrayDesign = (ArrayDesign) geoConverter.convert(rawGEOPlatform);
if (geoArrayDesign == null) {
if (!rawGEOPlatform.useDataFromGeo()) {
// MPSS, exon arrays
return;
}
throw new IllegalStateException("Platform is missing");
}
// find in our system. Note we only use the short name. The full name can change in GEO, causing trouble.
ArrayDesign existing = arrayDesignService.findByShortName(geoArrayDesign.getShortName());
if (existing == null) {
AbstractGeoService.log.info(rawGEOPlatform + " looks new to Gemma");
for (CompositeSequence cs : geoArrayDesign.getCompositeSequences()) {
String geoProbeName = cs.getName();
probeNamesInGemma.put(geoProbeName, geoProbeName);
// no mapping needed. NB the converter fills
// this in already, we're just being defensive
// here.
}
} else {
AbstractGeoService.log.info("Platform " + rawGEOPlatform.getGeoAccession() + " exists in Gemma, checking for correct probe names and re-matching if necessary ...");
String columnWithGeoNames;
columnWithGeoNames = this.getGEOIDColumnName(rawGEOPlatform, geoArrayDesign);
if (columnWithGeoNames == null) {
// no problem: this means the design has no elements, so it is actually a placeholder (e.g., MPSS)
return;
}
AbstractGeoService.log.info("Loading probes ...");
Map<CompositeSequence, BioSequence> m = arrayDesignService.getBioSequences(existing);
c.add(existing, m);
this.getGemmaIDColumnNameInGEO(rawGEOPlatform, m, columnWithGeoNames);
}
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class GeoServiceImpl method addElements.
@Override
public ArrayDesign addElements(ArrayDesign targetPlatform) {
if (!targetPlatform.getCompositeSequences().isEmpty()) {
throw new IllegalArgumentException("Only call this if you are filling in an empty platform");
}
String geoAccession = targetPlatform.getExternalReferences().iterator().next().getAccession();
Collection<? extends GeoData> platforms = geoDomainObjectGenerator.generate(geoAccession);
if (platforms.size() == 0) {
throw new IllegalStateException();
}
/*
* We do this to get a fresh instantiation of GeoConverter (prototype scope)
*/
GeoConverter geoConverter = (GeoConverter) this.beanFactory.getBean("geoConverter");
if (this.geoDomainObjectGenerator == null) {
this.geoDomainObjectGenerator = new GeoDomainObjectGenerator();
} else {
this.geoDomainObjectGenerator.initialize();
}
geoDomainObjectGenerator.setProcessPlatformsOnly(true);
geoConverter.setForceConvertElements(true);
Collection<Object> arrayDesigns = geoConverter.convert(platforms);
Collection<CompositeSequence> els = ((ArrayDesign) arrayDesigns.iterator().next()).getCompositeSequences();
for (CompositeSequence cs : els) {
cs.setArrayDesign(targetPlatform);
cs.setBiologicalCharacteristic((BioSequence) persisterHelper.persist(cs.getBiologicalCharacteristic()));
}
AbstractGeoService.log.info("Adding " + els.size() + " elements to " + targetPlatform);
targetPlatform.getCompositeSequences().addAll(els);
arrayDesignService.update(targetPlatform);
this.arrayDesignReportService.generateArrayDesignReport(targetPlatform.getId());
return targetPlatform;
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class SimpleExpressionDataLoaderServiceImpl method getSubMatrixForArrayDesign.
@Override
public DoubleMatrix<String, String> getSubMatrixForArrayDesign(DoubleMatrix<String, String> matrix, Collection<Object> usedDesignElements, ArrayDesign design) {
List<String> designElements = new ArrayList<>();
List<String> columnNames = new ArrayList<>(matrix.getColNames());
List<double[]> rows = new ArrayList<>();
Collection<Object> arrayDesignElementNames = new HashSet<>();
for (CompositeSequence cs : design.getCompositeSequences()) {
arrayDesignElementNames.add(cs.getName());
}
for (String object : matrix.getRowNames()) {
/*
* disallow using design elements more than once; if two array designs match a given row name, we just end
* up arbitrarily assigning it to one of the array designs.
*/
if (arrayDesignElementNames.contains(object) && !usedDesignElements.contains(object)) {
rows.add(matrix.getRow(matrix.getRowIndexByName(object)));
usedDesignElements.add(object);
designElements.add(object);
}
}
if (usedDesignElements.size() == 0) {
throw new IllegalArgumentException("No design elements matched?");
}
SimpleExpressionDataLoaderServiceImpl.log.info("Found " + rows.size() + " data rows for " + design);
if (rows.size() == 0) {
SimpleExpressionDataLoaderServiceImpl.log.warn("A platform was entered ( " + design + " ) for which there are no matching rows in the data");
return null;
}
double[][] allSubMatrixRows = new double[rows.size()][rows.iterator().next().length];
rows.toArray(allSubMatrixRows);
DoubleMatrix<String, String> subMatrix = DoubleMatrixFactory.fastrow(allSubMatrixRows);
subMatrix.setRowNames(designElements);
subMatrix.setColumnNames(columnNames);
return subMatrix;
}
Aggregations