use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class RepartitionHadoopSequenceFile method main.
/**
* Reparations an MMTF-Hadoop Sequence file.
*
* @param args
* args[0] path to input Hadoop Sequence file, args[1] path to
* output Hadoop Sequence File, args[3] number of partitions
* @throws IOException
*/
public static void main(String[] args) throws IOException {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(RepartitionHadoopSequenceFile.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
long start = System.nanoTime();
if (args.length != 3) {
System.out.println("Usage: RepartitionHadoopSequenceFile <input-path> <ouput-path> <number-of-partitions>");
}
String inputPath = args[0];
String outputPath = args[1];
int numPartitions = Integer.parseInt(args[2]);
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(inputPath, sc);
pdb = pdb.repartition(numPartitions);
MmtfWriter.writeSequenceFile(outputPath, sc, pdb);
long end = System.nanoTime();
System.out.println("Time: " + TimeUnit.NANOSECONDS.toSeconds(end - start) + " sec.");
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class MmcifToMmtfFull method main.
/**
* Converts a directory containing .cif files into an MMTF-Hadoop Sequence file.
* The input directory is traversed recursively to find PDB files.
*
* @param args args[0] <input-path-to-cif_files>, args[1] <output-path-to-mmtf-hadoop-file>
*
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
if (args.length != 2) {
System.out.println("Usage: MmcifToMmtfFull <input-path-to-cif_files> <output-path-to-mmtf-hadoop-file>");
}
// path to input directory
String cifPath = args[0];
// path to output directory
String mmtfPath = args[1];
// instantiate Spark
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("MmcifToMmtfFull");
JavaSparkContext sc = new JavaSparkContext(conf);
// read cif files recursively starting from the specified top level directory
JavaPairRDD<String, StructureDataInterface> structures = MmtfImporter.importMmcifFiles(cifPath, sc);
// save as an MMTF-Hadoop Sequence File
MmtfWriter.writeSequenceFile(mmtfPath, sc, structures);
System.out.println(structures.count() + " structures written to: " + mmtfPath);
// close Spark
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class MmtfBenchmark method main.
public static void main(String[] args) throws FileNotFoundException {
long start = System.nanoTime();
if (args.length != 1) {
System.out.println("Usage: MmtfBenchmark <mmtf-hadoop-sequence-file>");
}
// instantiate Spark. Each Spark application needs these two lines of code.
SparkConf conf = new SparkConf().setAppName(MmtfBenchmark.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// read all PDB entries from a local Hadoop sequence file
String path = args[0];
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, sc);
System.out.println("# structures: " + pdb.count());
// close Spark
sc.close();
long end = System.nanoTime();
System.out.println((end - start) / 1E9 + " sec.");
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class PdbToMmtfFull method main.
/**
* Converts a directory containing PDB files into an MMTF-Hadoop Sequence file.
* The input directory is traversed recursively to find PDB files.
*
* @param args args[0] <input-path-to-pdb_files>, args[1] <output-path-to-mmtf-hadoop-file>
*
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
if (args.length != 2) {
System.out.println("Usage: PdbToMmtfFull <input-path-to-pdb_files> <output-path-to-mmtf-hadoop-file>");
}
// path to input directory
String pdbPath = args[0];
// path to output directory
String mmtfPath = args[1];
// instantiate Spark
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("PdbToMmtfFull");
JavaSparkContext sc = new JavaSparkContext(conf);
// read PDB files recursively starting from the specified top level directory
JavaPairRDD<String, StructureDataInterface> structures = MmtfImporter.importPdbFiles(pdbPath, sc);
// save as an MMTF-Hadoop Sequence File
MmtfWriter.writeSequenceFile(mmtfPath, sc, structures);
System.out.println(structures.count() + " structures written to: " + mmtfPath);
// close Spark
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class ReadLocalMmtfHadoopFile method main.
public static void main(String[] args) {
if (args.length != 1) {
System.err.println("Usage: " + ReadLocalMmtfHadoopFile.class.getSimpleName() + " <inputFilePath>");
System.exit(1);
}
// instantiate Spark. Each Spark application needs these two lines of code.
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ReadLocalMmtfHadoopFile.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// read a local MMTF file
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(args[0], sc);
System.out.println("# structures: " + pdb.count());
// print structural details
pdb = pdb.sample(false, 0.01);
pdb.foreach(t -> TraverseStructureHierarchy.printStructureData(t._2));
// close Spark
sc.close();
}
Aggregations