use of org.talend.components.simplefileio.runtime.ugi.UgiDoAs in project components by Talend.
the class SimpleFileIODatasetRuntime method getReadWriteUgiDoAs.
/**
* Helper method for any runtime to get the appropriate {@link UgiDoAs} for executing.
*
* @param datasetProperties dataset properties, containing credentials for the cluster.
* @param accessType the type of access to the dataset that the user will be performing.
* @return An object that can be used to execute actions with the correct credentials.
*/
public static UgiDoAs getReadWriteUgiDoAs(SimpleFileIODatasetProperties datasetProperties, UgiExceptionHandler.AccessType accessType) {
String path = datasetProperties.path.getValue();
SimpleFileIODatastoreProperties datastoreProperties = datasetProperties.getDatastoreProperties();
if (datastoreProperties.useKerberos.getValue()) {
UgiDoAs doAs = UgiDoAs.ofKerberos(datastoreProperties.kerberosPrincipal.getValue(), datastoreProperties.kerberosKeytab.getValue());
return new UgiExceptionHandler(doAs, accessType, datastoreProperties.kerberosPrincipal.getValue(), path);
} else if (datastoreProperties.userName.getValue() != null && !datastoreProperties.userName.getValue().isEmpty()) {
UgiDoAs doAs = UgiDoAs.ofSimple(datastoreProperties.userName.getValue());
return new UgiExceptionHandler(doAs, accessType, datastoreProperties.userName.getValue(), path);
} else {
return new UgiExceptionHandler(UgiDoAs.ofNone(), accessType, null, path);
}
}
use of org.talend.components.simplefileio.runtime.ugi.UgiDoAs in project components by Talend.
the class SimpleFileIOInputRuntime method expand.
@Override
public PCollection<IndexedRecord> expand(PBegin in) {
// Controls the access security on the cluster.
UgiDoAs doAs = SimpleFileIODatasetRuntime.getReadWriteUgiDoAs(properties.getDatasetProperties(), UgiExceptionHandler.AccessType.Read);
String path = properties.getDatasetProperties().path.getValue();
// overwrite is ignored for reads.
boolean overwrite = false;
int limit = properties.limit.getValue();
// mergeOutput is ignored for reads.
boolean mergeOutput = false;
SimpleRecordFormat rf = null;
switch(properties.getDatasetProperties().format.getValue()) {
case AVRO:
rf = new SimpleRecordFormatAvroIO(doAs, path, overwrite, limit, mergeOutput);
break;
case CSV:
rf = new SimpleRecordFormatCsvIO(doAs, path, overwrite, limit, properties.getDatasetProperties().getRecordDelimiter(), properties.getDatasetProperties().getFieldDelimiter(), mergeOutput);
break;
case PARQUET:
rf = new SimpleRecordFormatParquetIO(doAs, path, overwrite, limit, mergeOutput);
break;
}
if (rf == null) {
throw new RuntimeException("To be implemented: " + properties.getDatasetProperties().format.getValue());
}
return rf.read(in);
}
use of org.talend.components.simplefileio.runtime.ugi.UgiDoAs in project components by Talend.
the class S3InputRuntime method expand.
@Override
public PCollection<IndexedRecord> expand(PBegin in) {
// The UGI does not control security for S3.
UgiDoAs doAs = UgiDoAs.ofNone();
String path = S3Connection.getUriPath(properties.getDatasetProperties());
// overwrite is ignored for reads.
boolean overwrite = false;
int limit = properties.limit.getValue();
// mergeOutput is ignored for reads.
boolean mergeOutput = false;
SimpleRecordFormatBase rf = null;
switch(properties.getDatasetProperties().format.getValue()) {
case AVRO:
rf = new SimpleRecordFormatAvroIO(doAs, path, overwrite, limit, mergeOutput);
break;
case CSV:
rf = new SimpleRecordFormatCsvIO(doAs, path, overwrite, limit, properties.getDatasetProperties().getRecordDelimiter(), properties.getDatasetProperties().getFieldDelimiter(), mergeOutput);
break;
case PARQUET:
rf = new SimpleRecordFormatParquetIO(doAs, path, overwrite, limit, mergeOutput);
break;
}
if (rf == null) {
throw new RuntimeException("To be implemented: " + properties.getDatasetProperties().format.getValue());
}
S3Connection.setS3Configuration(rf.getExtraHadoopConfiguration(), properties.getDatasetProperties());
return rf.read(in);
}
use of org.talend.components.simplefileio.runtime.ugi.UgiDoAs in project components by Talend.
the class S3OutputRuntime method expand.
@Override
public PDone expand(PCollection<IndexedRecord> in) {
// The UGI does not control security for S3.
UgiDoAs doAs = UgiDoAs.ofNone();
String path = S3Connection.getUriPath(properties.getDatasetProperties());
boolean overwrite = properties.overwrite.getValue();
// limit is ignored for sinks
int limit = -1;
boolean mergeOutput = properties.mergeOutput.getValue();
SimpleRecordFormatBase rf = null;
switch(properties.getDatasetProperties().format.getValue()) {
case AVRO:
rf = new SimpleRecordFormatAvroIO(doAs, path, overwrite, limit, mergeOutput);
break;
case CSV:
rf = new SimpleRecordFormatCsvIO(doAs, path, overwrite, limit, properties.getDatasetProperties().getRecordDelimiter(), properties.getDatasetProperties().getFieldDelimiter(), mergeOutput);
break;
case PARQUET:
rf = new SimpleRecordFormatParquetIO(doAs, path, overwrite, limit, mergeOutput);
break;
}
if (rf == null) {
throw new RuntimeException("To be implemented: " + properties.getDatasetProperties().format.getValue());
}
S3Connection.setS3Configuration(rf.getExtraHadoopConfiguration(), properties.getDatasetProperties());
return rf.write(in);
}
use of org.talend.components.simplefileio.runtime.ugi.UgiDoAs in project components by Talend.
the class SimpleFileIOOutputRuntime method expand.
@Override
public PDone expand(PCollection<IndexedRecord> in) {
// Controls the access security on the cluster.
UgiDoAs doAs = SimpleFileIODatasetRuntime.getReadWriteUgiDoAs(properties.getDatasetProperties(), UgiExceptionHandler.AccessType.Write);
String path = properties.getDatasetProperties().path.getValue();
boolean overwrite = properties.overwrite.getValue();
// limit is ignored for sinks
int limit = -1;
boolean mergeOutput = properties.mergeOutput.getValue();
SimpleRecordFormat rf = null;
switch(properties.getDatasetProperties().format.getValue()) {
case AVRO:
rf = new SimpleRecordFormatAvroIO(doAs, path, overwrite, limit, mergeOutput);
break;
case CSV:
rf = new SimpleRecordFormatCsvIO(doAs, path, overwrite, limit, properties.getDatasetProperties().getRecordDelimiter(), properties.getDatasetProperties().getFieldDelimiter(), mergeOutput);
break;
case PARQUET:
rf = new SimpleRecordFormatParquetIO(doAs, path, overwrite, limit, mergeOutput);
break;
}
if (rf == null) {
throw new RuntimeException("To be implemented: " + properties.getDatasetProperties().format.getValue());
}
try {
return rf.write(in);
} catch (IllegalStateException rte) {
// Unable to overwrite exceptions are handled here.
if (rte.getMessage().startsWith("Output path") && rte.getMessage().endsWith("already exists")) {
throw SimpleFileIOErrorCode.createOutputAlreadyExistsException(rte, path);
} else {
throw rte;
}
}
}
Aggregations