use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class MapToBioAssembly method main.
public static void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(CustomReportDemo.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
List<String> pdbIds = Arrays.asList("1HV4");
JavaPairRDD<String, StructureDataInterface> bioassemblies = MmtfReader.downloadFullMmtfFiles(pdbIds, sc).flatMapToPair(new StructureToBioassembly());
System.out.println("Number of bioassemblies for 1HV4: " + bioassemblies.count());
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class MapToBioAssembly2 method main.
public static void main(String[] args) throws FileNotFoundException, IOException {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(CustomReportDemo.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
long start = System.nanoTime();
// List<String> pdbIds = Arrays.asList("1HV4");
// List<String> pdbIds = Arrays.asList("2HHB");
// JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader
// .downloadFullMmtfFiles(pdbIds, sc);
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readFullSequenceFile(sc).filter(new Pisces(20, 3.0));
// System.out.println("**** AU ****");
// pdb.foreach(t -> TraverseStructureHierarchy.printStructureData(t._2));
JavaPairRDD<String, StructureDataInterface> bioassemblies = pdb.flatMapToPair(new StructureToBioassembly2());
System.out.println("Number of bioassemblies: " + bioassemblies.count());
long end = System.nanoTime();
System.out.println("time: " + (end - start) / 1E9 + " sec.");
// System.out.println("**** BA ****");
// bioassemblies.foreach(t -> TraverseStructureHierarchy.printStructureData(t._2));
// bioassemblies.foreach(t -> TraverseStructureHierarchy.printChainEntityGroupAtomInfo(t._2));
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class RosettaToMmtfFull method main.
/**
* Converts a directory containing Rosetta-style PDB files into an MMTF-Hadoop Sequence file.
* The input directory is traversed recursively to find PDB files.
*
* <p> Example files from Gremlin website:
* https://gremlin2.bakerlab.org/meta/aah4043_final.zip
*
* @param args args[0] <path-to-pdb_files>, args[1] <path-to-mmtf-hadoop-file>
*
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
if (args.length != 2) {
System.out.println("Usage: RosettaToMmtfFull <path-to-pdb_files> <path-to-mmtf-hadoop-file>");
}
// path to input directory
String pdbPath = args[0];
// path to output directory
String mmtfPath = args[1];
// instantiate Spark
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("RosettaToMmtfFull");
JavaSparkContext sc = new JavaSparkContext(conf);
// read PDB files recursively starting the specified directory
JavaPairRDD<String, StructureDataInterface> structures = MmtfImporter.importPdbFiles(pdbPath, sc);
// save as an MMTF-Hadoop Sequence File
MmtfWriter.writeSequenceFile(mmtfPath, sc, structures);
// close Spark
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class TraverseStructureHierarchy method main.
public static void main(String[] args) {
// instantiate Spark. Each Spark application needs these two lines of code.
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ReadMmtfReduced.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// List<String> pdbIds = Arrays.asList("5UTV"); // multiple models
// List<String> pdbIds = Arrays.asList("1BZ1"); // multiple protein chains
// List<String> pdbIds = Arrays.asList("1STP"); // single protein chain
// structure with 2 bioassemblies
List<String> pdbIds = Arrays.asList("1HV4");
// List<String> pdbIds = Arrays.asList("2NBK"); // single protein chain
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc).cache();
pdb.foreach(t -> TraverseStructureHierarchy.printAll(t._2));
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class WriteMmtfCustom method main.
/**
* @param args
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfFullPath();
long start = System.nanoTime();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(WriteMmtfCustom.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// read a 20% random sample of the PDB
double fraction = 0.2;
long seed = 123;
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, fraction, seed, sc);
// retain high resolution X-ray structures
pdb = pdb.filter(new ExperimentalMethods(ExperimentalMethods.X_RAY_DIFFRACTION)).filter(new Resolution(0, 2.0)).filter(new Rfree(0, 0.2));
// coalesce this into 8 partitions to avoid creating many small files
pdb = pdb.coalesce(8);
// save this subset in a Hadoop Sequence file
MmtfWriter.writeSequenceFile(path + "_xray", sc, pdb);
System.out.println("# structures in custom set: " + pdb.count());
long end = System.nanoTime();
System.out.println("Time: " + (end - start) / 1E9 + "sec.");
sc.close();
}
Aggregations