use of org.apache.gobblin.source.extractor.filebased.FileBasedHelperException in project incubator-gobblin by apache.
the class SftpFsHelper method connect.
/**
* Opens up a connection to specified host using the username. Connects to the source using a private key without
* prompting for a password. This method does not support connecting to a source using a password, only by private
* key
* @throws org.apache.gobblin.source.extractor.filebased.FileBasedHelperException
*/
@Override
public void connect() throws FileBasedHelperException {
String privateKey = PasswordManager.getInstance(this.state).readPassword(this.state.getProp(ConfigurationKeys.SOURCE_CONN_PRIVATE_KEY));
String password = PasswordManager.getInstance(this.state).readPassword(this.state.getProp(ConfigurationKeys.SOURCE_CONN_PASSWORD));
String knownHosts = this.state.getProp(ConfigurationKeys.SOURCE_CONN_KNOWN_HOSTS);
String userName = this.state.getProp(ConfigurationKeys.SOURCE_CONN_USERNAME);
String hostName = this.state.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME);
int port = this.state.getPropAsInt(ConfigurationKeys.SOURCE_CONN_PORT, ConfigurationKeys.SOURCE_CONN_DEFAULT_PORT);
String proxyHost = this.state.getProp(ConfigurationKeys.SOURCE_CONN_USE_PROXY_URL);
int proxyPort = this.state.getPropAsInt(ConfigurationKeys.SOURCE_CONN_USE_PROXY_PORT, -1);
JSch.setLogger(new JSchLogger());
JSch jsch = new JSch();
log.info("Attempting to connect to source via SFTP with" + " privateKey: " + privateKey + " knownHosts: " + knownHosts + " userName: " + userName + " hostName: " + hostName + " port: " + port + " proxyHost: " + proxyHost + " proxyPort: " + proxyPort);
try {
if (!Strings.isNullOrEmpty(privateKey)) {
List<IdentityStrategy> identityStrategies = ImmutableList.of(new LocalFileIdentityStrategy(), new DistributedCacheIdentityStrategy(), new HDFSIdentityStrategy());
for (IdentityStrategy identityStrategy : identityStrategies) {
if (identityStrategy.setIdentity(privateKey, jsch)) {
break;
}
}
}
this.session = jsch.getSession(userName, hostName, port);
this.session.setConfig("PreferredAuthentications", "publickey,password");
if (Strings.isNullOrEmpty(knownHosts)) {
log.info("Known hosts path is not set, StrictHostKeyChecking will be turned off");
this.session.setConfig("StrictHostKeyChecking", "no");
} else {
jsch.setKnownHosts(knownHosts);
}
if (!Strings.isNullOrEmpty(password)) {
this.session.setPassword(password);
}
if (proxyHost != null && proxyPort >= 0) {
this.session.setProxy(new ProxyHTTP(proxyHost, proxyPort));
}
UserInfo ui = new MyUserInfo();
this.session.setUserInfo(ui);
this.session.setDaemonThread(true);
this.session.connect();
log.info("Finished connecting to source");
} catch (JSchException e) {
if (this.session != null) {
this.session.disconnect();
}
log.error(e.getMessage(), e);
throw new FileBasedHelperException("Cannot connect to SFTP source", e);
}
}
use of org.apache.gobblin.source.extractor.filebased.FileBasedHelperException in project incubator-gobblin by apache.
the class SftpFsHelper method getFileMTime.
@Override
public long getFileMTime(String filePath) throws FileBasedHelperException {
ChannelSftp channelSftp = null;
try {
channelSftp = getSftpChannel();
int modificationTime = channelSftp.lstat(filePath).getMTime();
return modificationTime;
} catch (SftpException e) {
throw new FileBasedHelperException(String.format("Failed to get modified timestamp for file at path %s due to error %s", filePath, e.getMessage()), e);
} finally {
if (channelSftp != null) {
channelSftp.disconnect();
}
}
}
use of org.apache.gobblin.source.extractor.filebased.FileBasedHelperException in project incubator-gobblin by apache.
the class SftpLightWeightFileSystem method initialize.
@Override
public void initialize(URI name, Configuration conf) throws IOException {
super.initialize(name, conf);
State state = HadoopUtils.getStateFromConf(conf);
this.fsHelper = new SftpFsHelper(state);
try {
this.fsHelper.connect();
} catch (FileBasedHelperException e) {
throw new IOException(e);
}
}
use of org.apache.gobblin.source.extractor.filebased.FileBasedHelperException in project incubator-gobblin by apache.
the class RegexBasedPartitionedRetriever method getFilesToProcess.
@Override
public List<FileInfo> getFilesToProcess(long minWatermark, int maxFilesToReturn) throws IOException {
// This implementation assumes snapshots are always in the root directory and the number of them
// remains relatively small
long maxAllowedWatermark = new DateTime().minus(leadTime).getMillis();
try {
this.helper.connect();
FileSystem fs = helper.getFileSystem();
List<FileInfo> filesToProcess = new ArrayList<>();
List<FileInfo> outerDirectories = getOuterDirectories(fs, minWatermark, maxAllowedWatermark);
for (FileInfo outerDirectory : outerDirectories) {
FileStatus[] files = fs.listStatus(new Path(outerDirectory.getFilePath()), getFileFilter());
for (FileStatus file : files) {
filesToProcess.add(new FileInfo(file.getPath().toString(), file.getLen(), outerDirectory.getWatermarkMsSinceEpoch()));
}
if (filesToProcess.size() > maxFilesToReturn) {
break;
}
}
return filesToProcess;
} catch (FileBasedHelperException e) {
throw new IOException("Error initializing Hadoop connection", e);
}
}
use of org.apache.gobblin.source.extractor.filebased.FileBasedHelperException in project incubator-gobblin by apache.
the class HadoopFsHelper method getFileStream.
/**
* Returns an {@link InputStream} to the specified file.
* <p>
* Note: It is the caller's responsibility to close the returned {@link InputStream}.
* </p>
*
* @param path The path to the file to open.
* @return An {@link InputStream} for the specified file.
* @throws FileBasedHelperException if there is a problem opening the {@link InputStream} for the specified file.
*/
@Override
public InputStream getFileStream(String path) throws FileBasedHelperException {
try {
Path p = new Path(path);
InputStream in = this.getFileSystem().open(p);
// Account for compressed files (e.g. gzip).
// https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
CompressionCodecFactory factory = new CompressionCodecFactory(this.getFileSystem().getConf());
CompressionCodec codec = factory.getCodec(p);
return (codec == null) ? in : codec.createInputStream(in);
} catch (IOException e) {
throw new FileBasedHelperException("Cannot open file " + path + " due to " + e.getMessage(), e);
}
}
Aggregations