use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class DownloadSwissModelFiles method main.
/**
* Converts a directory containing Rosetta-style PDB files into an MMTF-Hadoop Sequence file.
* The input directory is traversed recursively to find PDB files.
*
* <p> Example files from Gremlin website:
* https://gremlin2.bakerlab.org/meta/aah4043_final.zip
*
* @param args args[0] <path-to-pdb_files>, args[1] <path-to-mmtf-hadoop-file>
*
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
// instantiate Spark
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("DownloadSwissProtModelFiles");
JavaSparkContext sc = new JavaSparkContext(conf);
List<String> uniProtIds = Arrays.asList("P22629", "Q9H2C2", "Q8WXK3");
// List<String> uniProtIds = Arrays.asList("P07900");
// read PDB files recursively starting the specified directory
// TODO: Empty structure record for Q8WXK3
JavaPairRDD<String, StructureDataInterface> structures = MmtfImporter.downloadSwissModelsByUniProtIds(uniProtIds, sc);
structures.foreach(t -> TraverseStructureHierarchy.printStructureData(t._2));
// save as an MMTF-Hadoop Sequence File
// MmtfWriter.writeSequenceFile(mmtfPath, sc, structures);
// close Spark
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class PdbToMmtfFull method main.
/**
* Converts a directory containing Rosetta-style PDB files into an MMTF-Hadoop Sequence file.
* The input directory is traversed recursively to find PDB files.
*
* <p> Example files from Gremlin website:
* https://gremlin2.bakerlab.org/meta/aah4043_final.zip
*
* @param args args[0] <path-to-pdb_files>, args[1] <path-to-mmtf-hadoop-file>
*
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
if (args.length != 2) {
System.out.println("Usage: RosettaToMmtfFull <path-to-pdb_files> <path-to-mmtf-hadoop-file>");
}
// path to input directory
String pdbPath = args[0];
// path to output directory
String mmtfPath = args[1];
// instantiate Spark
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("RosettaToMmtfFull");
JavaSparkContext sc = new JavaSparkContext(conf);
// read PDB files recursively starting the specified directory
JavaPairRDD<String, StructureDataInterface> structures = MmtfImporter.importPdbFiles(pdbPath, sc);
structures.foreach(t -> TraverseStructureHierarchy.printStructureData(t._2));
// save as an MMTF-Hadoop Sequence File
MmtfWriter.writeSequenceFile(mmtfPath, sc, structures);
// close Spark
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class RosettaToMmtfFull method main.
/**
* Converts a directory containing Rosetta-style PDB files into an MMTF-Hadoop Sequence file.
* The input directory is traversed recursively to find PDB files.
*
* <p> Example files from Gremlin website:
* https://gremlin2.bakerlab.org/meta/aah4043_final.zip
*
* @param args args[0] <path-to-pdb_files>, args[1] <path-to-mmtf-hadoop-file>
*
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
if (args.length != 2) {
System.out.println("Usage: RosettaToMmtfFull <path-to-pdb_files> <path-to-mmtf-hadoop-file>");
}
// path to input directory
String pdbPath = args[0];
// path to output directory
String mmtfPath = args[1];
// instantiate Spark
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("RosettaToMmtfFull");
JavaSparkContext sc = new JavaSparkContext(conf);
// read PDB files recursively starting the specified directory
JavaPairRDD<String, StructureDataInterface> structures = MmtfImporter.importPdbFiles(pdbPath, sc);
// save as an MMTF-Hadoop Sequence File
MmtfWriter.writeSequenceFile(mmtfPath, sc, structures);
// close Spark
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class SwissModelDatasetToStructure method main.
public static void main(String[] args) throws IOException {
SparkSession spark = SparkSession.builder().master("local[*]").appName(SwissModelDatasetToStructure.class.getSimpleName()).getOrCreate();
JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
List<String> uniProtIds = Arrays.asList("P36575", "P24539", "O00244", "P18846", "Q9UII2");
Dataset<Row> ds = SwissModelDataset.getSwissModels(uniProtIds);
ds.show();
ds = ds.filter("qmean > -2.5 AND coverage > 0.5");
List<String> urls = ds.select("coordinates").as(Encoders.STRING()).collectAsList();
System.out.println(urls);
JavaPairRDD<String, StructureDataInterface> models = MmtfImporter.downloadSwissModelsByUrls(urls, sc);
models.foreach(t -> System.out.println(t._2.getEntitySequence(0)));
spark.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class TestRosettaMmtf method main.
/**
* Test: Read MMTF-Hadoop Sequence file.
*
* @param args args[0] <path-to-mmtf-haddop-sequence-file>
*
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
// instantiate Spark
// TODO set to local[1] !!!!
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("TestSwissModelMmtf");
JavaSparkContext sc = new JavaSparkContext(conf);
long start = System.nanoTime();
// read PDB files recursively starting the specified directory
JavaPairRDD<String, StructureDataInterface> structures = MmtfReader.readSequenceFile(args[0], sc);
// total: 639 structures
// structures = structures.filter(new ContainsDnaChain()); // ?
// structures = structures.filter(new ContainsLProteinChain()); // 639?
// structures = structures.filter(new ContainsGroup("ZN")); // 0
// structures = structures.filter(new ContainsGroup("ATP")); //
// debug: print structure data
// structures.foreach(t -> TraverseStructureHierarchy.demo(t._2));
// structures.foreach(t -> System.out.println(t._1));
System.out.println(structures.map(t -> t._2.getNumEntities()).reduce((a, b) -> a + b));
System.out.println("Number of structures read: " + structures.count());
long end = System.nanoTime();
System.out.println("Time: " + (end - start) / 1E9 + " sec.");
// close Spark
sc.close();
}
Aggregations