use of io.cdap.cdap.api.dataset.lib.FileSet in project cdap by caskdata.
the class PartitionedFileSetDefinition method getDataset.
@Override
public PartitionedFileSet getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
// properties must contain the partitioning
Partitioning partitioning = PartitionedFileSetProperties.getPartitioning(spec.getProperties());
// make any necessary updates to the arguments
arguments = updateArgumentsIfNeeded(arguments, partitioning);
FileSet fileset = filesetDef.getDataset(datasetContext, spec.getSpecification(FILESET_NAME), arguments, classLoader);
IndexedTable table = indexedTableDef.getDataset(datasetContext, spec.getSpecification(PARTITION_TABLE_NAME), arguments, classLoader);
return new PartitionedFileSetDataset(datasetContext, spec.getName(), partitioning, fileset, table, spec, arguments, getExploreProvider());
}
use of io.cdap.cdap.api.dataset.lib.FileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testDefaultBasePath.
@Test
public void testDefaultBasePath() throws Exception {
DatasetId id = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testDefaultPath");
dsFrameworkUtil.createInstance("partitionedFileSet", id, PartitionedFileSetProperties.builder().setPartitioning(PARTITIONING_1).build());
PartitionedFileSet pfs = dsFrameworkUtil.getInstance(id);
Location baseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
Assert.assertEquals(baseLocation.getName(), id.getDataset());
Assert.assertTrue(baseLocation.exists());
Assert.assertTrue(baseLocation.isDirectory());
DatasetId fid = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testDefaultPathFileSet");
dsFrameworkUtil.createInstance("fileSet", fid, FileSetProperties.builder().build());
FileSet fs = dsFrameworkUtil.getInstance(fid);
Location fsBaseLocation = fs.getBaseLocation();
Assert.assertEquals(Locations.getParent(baseLocation), Locations.getParent(fsBaseLocation));
dsFrameworkUtil.deleteInstance(fid);
dsFrameworkUtil.deleteInstance(id);
Assert.assertFalse(baseLocation.exists());
}
use of io.cdap.cdap.api.dataset.lib.FileSet in project cdap by caskdata.
the class FileSetTest method testRollbackOfNonDirectoryOutput.
@Test
public void testRollbackOfNonDirectoryOutput() throws IOException, TransactionFailureException, DatasetManagementException, UnauthorizedException {
// test deletion of an output location, pointing to a non-directory file
FileSet fileSet1 = createFileset(testFileSetInstance1);
Location outputFile = fileSet1.getOutputLocation();
Assert.assertFalse(outputFile.exists());
outputFile.getOutputStream().close();
Assert.assertTrue(outputFile.exists());
((FileSetDataset) fileSet1).onFailure();
// the output file should still not be deleted
Assert.assertTrue(outputFile.exists());
}
use of io.cdap.cdap.api.dataset.lib.FileSet in project cdap by caskdata.
the class FileSetTest method testInputOutputFormatClassAtRuntime.
@Test
public void testInputOutputFormatClassAtRuntime() throws Exception {
// create a dataset with text input and output formats
DatasetId datasetId = OTHER_NAMESPACE.dataset("testRuntimeFormats");
dsFrameworkUtil.createInstance("fileSet", datasetId, FileSetProperties.builder().setInputFormat(TextInputFormat.class).setOutputFormat(TextOutputFormat.class).build());
// without passing anything in arguments, the input/output format classes will come from dataset properties
FileSet fs = dsFrameworkUtil.getInstance(datasetId);
Assert.assertEquals(TextInputFormat.class.getName(), fs.getInputFormatClassName());
Assert.assertEquals(TextOutputFormat.class.getName(), fs.getOutputFormatClassName());
// allow overriding the input format in dataset runtime args
fs = dsFrameworkUtil.getInstance(datasetId, ImmutableMap.of(FileSetProperties.INPUT_FORMAT, CombineTextInputFormat.class.getName()));
Assert.assertEquals(CombineTextInputFormat.class.getName(), fs.getInputFormatClassName());
Assert.assertEquals(TextOutputFormat.class.getName(), fs.getOutputFormatClassName());
// allow overriding both the input and output format in dataset runtime args
fs = dsFrameworkUtil.getInstance(datasetId, ImmutableMap.of(FileSetProperties.INPUT_FORMAT, CombineTextInputFormat.class.getName(), FileSetProperties.OUTPUT_FORMAT, NullOutputFormat.class.getName()));
Assert.assertEquals(CombineTextInputFormat.class.getName(), fs.getInputFormatClassName());
Assert.assertEquals(NullOutputFormat.class.getName(), fs.getOutputFormatClassName());
}
use of io.cdap.cdap.api.dataset.lib.FileSet in project cdap by caskdata.
the class FileSetTest method testWriteRead.
@Test
public void testWriteRead() throws IOException, DatasetManagementException, UnauthorizedException {
FileSet fileSet1 = createFileset(testFileSetInstance1);
FileSet fileSet2 = createFileset(testFileSetInstance2);
Location fileSet1Output = fileSet1.getOutputLocation();
Location fileSet2Output = fileSet2.getOutputLocation();
Location fileSet1NsDir = Locations.getParent(Locations.getParent(Locations.getParent(fileSet1Output)));
Location fileSet2NsDir = Locations.getParent(Locations.getParent(Locations.getParent(fileSet2Output)));
Assert.assertNotNull(fileSet1NsDir);
Assert.assertNotNull(fileSet2NsDir);
Assert.assertEquals(fileSet1NsDir.getName(), DatasetFrameworkTestUtil.NAMESPACE_ID.getNamespace());
Assert.assertEquals(fileSet2NsDir.getName(), OTHER_NAMESPACE.getNamespace());
Assert.assertNotEquals(fileSet1.getInputLocations().get(0).toURI().getPath(), fileSet2.getInputLocations().get(0).toURI().getPath());
Assert.assertNotEquals(fileSet1Output.toURI().getPath(), fileSet2Output.toURI().getPath());
try (OutputStream out = fileSet1.getOutputLocation().getOutputStream()) {
out.write(42);
}
try (OutputStream out = fileSet2.getOutputLocation().getOutputStream()) {
out.write(54);
}
try (InputStream in = fileSet1.getInputLocations().get(0).getInputStream()) {
Assert.assertEquals(42, in.read());
}
try (InputStream in = fileSet2.getInputLocations().get(0).getInputStream()) {
Assert.assertEquals(54, in.read());
}
}
Aggregations