use of com.google.cloud.dataflow.sdk.Pipeline in project gatk by broadinstitute.
the class ReferenceAPISourceUnitTest method queryReferenceAPI.
private ReferenceBases queryReferenceAPI(final String referenceName, final SimpleInterval interval, int pageSize) {
final Pipeline p = setupPipeline();
ReferenceAPISource refAPISource = makeReferenceAPISource(referenceName, p);
return refAPISource.getReferenceBases(p.getOptions(), interval, pageSize);
}
use of com.google.cloud.dataflow.sdk.Pipeline in project gatk by broadinstitute.
the class ReferenceAPISourceUnitTest method testCreateByAssemblyID.
@Test(groups = "cloud", dataProvider = "assemblyIDData")
public void testCreateByAssemblyID(final String assemblyID, final String refID) throws Exception {
final Pipeline p = setupPipeline();
final ReferenceAPISource apiSourceByAssemblyName = ReferenceAPISource.fromReferenceSetAssemblyID(p.getOptions(), assemblyID);
final ReferenceAPISource apiSourceByID = makeReferenceAPISource(refID, p);
Assert.assertEquals(apiSourceByAssemblyName.getReferenceMap(), apiSourceByID.getReferenceMap());
}
use of com.google.cloud.dataflow.sdk.Pipeline in project gatk by broadinstitute.
the class ReferenceAPISourceUnitTest method testDummy.
@Test(groups = "cloud")
public void testDummy() {
String referenceName = HS37D5_REF_ID;
final String expected = "AAACAGGTTA";
// -1 because we're using closed intervals
SimpleInterval interval = new SimpleInterval("1", 50001, 50001 + expected.length() - 1);
Logger logger = LogManager.getLogger(ReferenceAPISourceUnitTest.class);
GenomicsOptions options = PipelineOptionsFactory.create().as(GenomicsOptions.class);
options.setApiKey(getGCPTestApiKey());
options.setProject(getGCPTestProject());
// We don't use GATKTestPipeline because we need specific options.
final Pipeline p = TestPipeline.create(options);
ReferenceAPISource refAPISource = makeReferenceAPISource(referenceName, p);
ReferenceBases bases = refAPISource.getReferenceBases(p.getOptions(), interval);
final String actual = new String(bases.getBases());
Assert.assertEquals(actual, expected, "Wrong bases returned");
p.run();
}
use of com.google.cloud.dataflow.sdk.Pipeline in project spark-dataflow by cloudera.
the class AvroPipelineTest method testGeneric.
@Test
public void testGeneric() throws Exception {
Schema schema = new Schema.Parser().parse(Resources.getResource("person.avsc").openStream());
GenericRecord savedRecord = new GenericData.Record(schema);
savedRecord.put("name", "John Doe");
savedRecord.put("age", 42);
savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
populateGenericFile(Lists.newArrayList(savedRecord), schema);
Pipeline p = Pipeline.create(PipelineOptionsFactory.create());
PCollection<GenericRecord> input = p.apply(AvroIO.Read.from(inputFile.getAbsolutePath()).withSchema(schema));
input.apply(AvroIO.Write.to(outputDir.getAbsolutePath()).withSchema(schema));
EvaluationResult res = SparkPipelineRunner.create().run(p);
res.close();
List<GenericRecord> records = readGenericFile();
assertEquals(Lists.newArrayList(savedRecord), records);
}
use of com.google.cloud.dataflow.sdk.Pipeline in project spark-dataflow by cloudera.
the class WindowedWordCountTest method testRun.
@Test
public void testRun() throws Exception {
SparkPipelineOptions options = SparkPipelineOptionsFactory.create();
options.setRunner(SparkPipelineRunner.class);
Pipeline p = Pipeline.create(PipelineOptionsFactory.create());
PCollection<String> inputWords = p.apply(Create.timestamped(WORDS, TIMESTAMPS)).setCoder(StringUtf8Coder.of());
PCollection<String> windowedWords = inputWords.apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
PCollection<String> output = windowedWords.apply(new SimpleWordCountTest.CountWords());
DataflowAssert.that(output).containsInAnyOrder(EXPECTED_COUNT_SET);
EvaluationResult res = SparkPipelineRunner.create().run(p);
res.close();
}
Aggregations