use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class LinkAnalysisCli method doWork.
@Override
protected Exception doWork(String[] args) {
Exception err = this.processCommandLine(args);
if (err != null) {
return err;
}
if (initializeFromOldData) {
AbstractCLI.log.info("Initializing links from old data for " + this.taxon);
LinkAnalysisPersister s = this.getBean(LinkAnalysisPersister.class);
s.initializeLinksFromOldData(this.taxon);
return null;
} else if (updateNodeDegree) {
// we waste some time here getting the experiments.
this.loadTaxon();
this.getBean(CoexpressionService.class).updateNodeDegrees(this.taxon);
return null;
}
this.linkAnalysisService = this.getBean(LinkAnalysisService.class);
if (this.dataFileName != null) {
/*
* Read vectors from file. Could provide as a matrix, but it's easier to provide vectors (less mess in later
* code)
*/
ArrayDesignService arrayDesignService = this.getBean(ArrayDesignService.class);
ArrayDesign arrayDesign = arrayDesignService.findByShortName(this.linkAnalysisConfig.getArrayName());
if (arrayDesign == null) {
return new IllegalArgumentException("No such array design " + this.linkAnalysisConfig.getArrayName());
}
this.loadTaxon();
arrayDesign = arrayDesignService.thawLite(arrayDesign);
Collection<ProcessedExpressionDataVector> dataVectors = new HashSet<>();
Map<String, CompositeSequence> csMap = new HashMap<>();
for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
csMap.put(cs.getName(), cs);
}
QuantitationType qtype = this.makeQuantitationType();
SimpleExpressionDataLoaderService simpleExpressionDataLoaderService = this.getBean(SimpleExpressionDataLoaderService.class);
ByteArrayConverter bArrayConverter = new ByteArrayConverter();
try (InputStream data = new FileInputStream(new File(this.dataFileName))) {
DoubleMatrix<String, String> matrix = simpleExpressionDataLoaderService.parse(data);
BioAssayDimension bad = this.makeBioAssayDimension(arrayDesign, matrix);
for (int i = 0; i < matrix.rows(); i++) {
byte[] bData = bArrayConverter.doubleArrayToBytes(matrix.getRow(i));
ProcessedExpressionDataVector vector = ProcessedExpressionDataVector.Factory.newInstance();
vector.setData(bData);
CompositeSequence cs = csMap.get(matrix.getRowName(i));
if (cs == null) {
continue;
}
vector.setDesignElement(cs);
vector.setBioAssayDimension(bad);
vector.setQuantitationType(qtype);
dataVectors.add(vector);
}
AbstractCLI.log.info("Read " + dataVectors.size() + " data vectors");
} catch (Exception e) {
return e;
}
this.linkAnalysisService.processVectors(this.taxon, dataVectors, filterConfig, linkAnalysisConfig);
} else {
/*
* Do in decreasing order of size, to help capture more links earlier - reduces fragmentation.
*/
List<BioAssaySet> sees = new ArrayList<>(expressionExperiments);
if (expressionExperiments.size() > 1) {
AbstractCLI.log.info("Sorting data sets by number of samples, doing large data sets first.");
Collection<ExpressionExperimentValueObject> vos = eeService.loadValueObjects(EntityUtils.getIds(expressionExperiments), true);
final Map<Long, ExpressionExperimentValueObject> idMap = EntityUtils.getIdMap(vos);
Collections.sort(sees, new Comparator<BioAssaySet>() {
@Override
public int compare(BioAssaySet o1, BioAssaySet o2) {
ExpressionExperimentValueObject e1 = idMap.get(o1.getId());
ExpressionExperimentValueObject e2 = idMap.get(o2.getId());
assert e1 != null : "No valueobject: " + e2;
assert e2 != null : "No valueobject: " + e1;
return -e1.getBioMaterialCount().compareTo(e2.getBioMaterialCount());
}
});
}
for (BioAssaySet ee : sees) {
if (ee instanceof ExpressionExperiment) {
this.processExperiment((ExpressionExperiment) ee);
} else {
throw new UnsupportedOperationException("Can't handle non-EE BioAssaySets yet");
}
}
this.summarizeProcessing();
}
return null;
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class ExpressionDataBooleanMatrix method createMatrix.
/**
* Fill in the data
*/
private ObjectMatrixImpl<CompositeSequence, Integer, Boolean> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
ObjectMatrixImpl<CompositeSequence, Integer, Boolean> mat = new ObjectMatrixImpl<>(vectors.size(), maxSize);
// initialize the matrix to false
for (int i = 0; i < mat.rows(); i++) {
for (int j = 0; j < mat.columns(); j++) {
mat.set(i, j, Boolean.FALSE);
}
}
for (int j = 0; j < mat.columns(); j++) {
mat.addColumnName(j);
}
ByteArrayConverter bac = new ByteArrayConverter();
Map<Integer, CompositeSequence> rowNames = new TreeMap<>();
for (DesignElementDataVector vector : vectors) {
BioAssayDimension dimension = vector.getBioAssayDimension();
byte[] bytes = vector.getData();
CompositeSequence designElement = vector.getDesignElement();
Integer rowIndex = this.rowElementMap.get(designElement);
assert rowIndex != null;
rowNames.put(rowIndex, designElement);
boolean[] vals = this.getVals(bac, vector, bytes);
Collection<BioAssay> bioAssays = dimension.getBioAssays();
if (bioAssays.size() != vals.length) {
throw new IllegalStateException("Expected " + vals.length + " bioassays at design element " + designElement + ", got " + bioAssays.size());
}
Iterator<BioAssay> it = bioAssays.iterator();
this.setMatBioAssayValues(mat, rowIndex, ArrayUtils.toObject(vals), bioAssays, it);
}
for (int i = 0; i < mat.rows(); i++) {
mat.addRowName(rowNames.get(i));
}
assert mat.getRowNames().size() == mat.rows();
return mat;
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class PersistentDummyObjectHelper method getDoubleData.
private byte[] getDoubleData() {
double[] data = new double[PersistentDummyObjectHelper.NUM_BIOMATERIALS];
double bump = 0.0;
for (int j = 0; j < data.length; j++) {
data[j] = new Random().nextDouble() + bump;
if (j % 3 == 0) {
// add some correlation structure to the data.
bump += 0.5;
}
}
ByteArrayConverter bconverter = new ByteArrayConverter();
return bconverter.doubleArrayToBytes(data);
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class PrincipalComponentAnalysis method getEigenvectorArrays.
/**
* @return Convenience method to access the eigenvectors, as a List of Double[].
*/
@Transient
public List<Double[]> getEigenvectorArrays() throws IllegalArgumentException {
ByteArrayConverter bac = new ByteArrayConverter();
List<Double[]> result = new ArrayList<>(this.getNumComponentsStored());
Collection<BioAssay> bioAssays = this.getBioAssayDimension().getBioAssays();
if (bioAssays.size() < this.getNumComponentsStored()) {
/*
* This is a sanity check. The number of components stored is fixed at some lower value
*/
throw new IllegalArgumentException("EE id = " + this.getExperimentAnalyzed().getId() + ", PCA: Number of components stored (" + this.getNumComponentsStored() + ") is less than the number of bioAssays (" + bioAssays.size() + ")");
}
for (int i = 0; i < bioAssays.size(); i++) {
result.add(null);
}
for (Eigenvector ev : this.getEigenVectors()) {
int index = ev.getComponentNumber() - 1;
if (index >= this.getNumComponentsStored())
continue;
double[] doubleArr = bac.byteArrayToDoubles(ev.getVector());
Double[] dA = ArrayUtils.toObject(doubleArr);
result.set(index, dA);
}
CollectionUtils.filter(result, new Predicate() {
@Override
public boolean evaluate(Object object) {
return object != null;
}
});
return result;
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class PrincipalComponentAnalysisServiceImpl method create.
@Override
@Transactional
public PrincipalComponentAnalysis create(ExpressionExperiment ee, DoubleMatrix<CompositeSequence, Integer> u, double[] eigenvalues, DoubleMatrix<Integer, BioMaterial> v, BioAssayDimension bad, int numComponentsToStore, int numLoadingsToStore) {
PrincipalComponentAnalysis pca = PrincipalComponentAnalysis.Factory.newInstance();
int actualNumberOfComponentsStored = Math.min(numComponentsToStore, v.columns());
pca.setNumComponentsStored(actualNumberOfComponentsStored);
pca.setBioAssayDimension(bad);
pca.setMaxNumProbesPerComponent(numLoadingsToStore);
pca.setExperimentAnalyzed(ee);
/*
* deal with U. We keep only the first numComponentsToStore components for the first numLoadingsToStore genes.
*/
for (int i = 0; i < actualNumberOfComponentsStored; i++) {
List<CompositeSequence> inOrder = u.sortByColumnAbsoluteValues(i, true);
for (int j = 0; j < Math.min(u.rows(), numLoadingsToStore) - 1; j++) {
CompositeSequence probe = inOrder.get(j);
ProbeLoading plr = ProbeLoading.Factory.newInstance(i + 1, u.getRowByName(probe)[i], j, probe);
pca.getProbeLoadings().add(plr);
}
}
/*
* deal with V. note we store all of it.
*/
ByteArrayConverter bac = new ByteArrayConverter();
for (int i = 0; i < v.columns(); i++) {
double[] column = v.getColumn(i);
byte[] eigenVectorBytes = bac.doubleArrayToBytes(column);
int componentNumber = i + 1;
log.debug(componentNumber);
Eigenvector evec = Eigenvector.Factory.newInstance(componentNumber, eigenVectorBytes);
pca.getEigenVectors().add(evec);
}
/*
* Deal with eigenvalues; note we store all of them.
*/
double sum = 0.0;
List<Eigenvalue> eigv = new ArrayList<>();
for (int i = 0; i < eigenvalues.length; i++) {
double d = eigenvalues[i];
sum += d;
Eigenvalue ev = Eigenvalue.Factory.newInstance();
ev.setComponentNumber(i + 1);
ev.setValue(d);
eigv.add(ev);
}
for (int i = 0; i < eigenvalues.length; i++) {
Eigenvalue eigenvalue = eigv.get(i);
eigenvalue.setVarianceFraction(eigenvalue.getValue() / sum);
pca.getEigenValues().add(eigenvalue);
}
return this.principalComponentAnalysisDao.create(pca);
}
Aggregations