use of org.apache.jackrabbit.core.data.DataStore in project jackrabbit-oak by apache.
the class TextExtractorMain method main.
public static void main(String[] args) throws Exception {
Closer closer = Closer.create();
String h = "tika [extract|report|generate]\n" + "\n" + "report : Generates a summary report related to binary data\n" + "extract : Performs the text extraction\n" + "generate : Generates the csv data file based on configured NodeStore/BlobStore";
try {
OptionParser parser = new OptionParser();
OptionSpec<?> help = parser.acceptsAll(asList("h", "?", "help"), "show help").forHelp();
OptionSpec<String> nodeStoreSpec = parser.accepts("nodestore", "NodeStore detail /path/to/oak/repository | mongodb://host:port/database").withRequiredArg().ofType(String.class);
OptionSpec<String> pathSpec = parser.accepts("path", "Path in repository under which the binaries would be searched").withRequiredArg().ofType(String.class);
OptionSpec<File> dataFileSpec = parser.accepts("data-file", "Data file in csv format containing the binary metadata").withRequiredArg().ofType(File.class);
OptionSpec<File> tikaConfigSpec = parser.accepts("tika-config", "Tika config file path").withRequiredArg().ofType(File.class);
OptionSpec<File> fdsDirSpec = parser.accepts("fds-path", "Path of directory used by FileDataStore").withRequiredArg().ofType(File.class);
OptionSpec<File> s3ConfigSpec = parser.accepts("s3-config-path", "Path of properties file containing config for S3DataStore").withRequiredArg().ofType(File.class);
OptionSpec<File> storeDirSpec = parser.accepts("store-path", "Path of directory used to store extracted text content").withRequiredArg().ofType(File.class);
OptionSpec<Integer> poolSize = parser.accepts("pool-size", "Size of the thread pool used to perform text extraction. Defaults " + "to number of cores on the system").withRequiredArg().ofType(Integer.class);
OptionSpec<String> nonOption = parser.nonOptions(h);
OptionSet options = parser.parse(args);
List<String> nonOptions = nonOption.values(options);
if (options.has(help)) {
parser.printHelpOn(System.out);
System.exit(0);
}
if (nonOptions.isEmpty()) {
parser.printHelpOn(System.err);
System.exit(1);
}
boolean report = nonOptions.contains("report");
boolean extract = nonOptions.contains("extract");
boolean generate = nonOptions.contains("generate");
File dataFile = null;
File storeDir = null;
File tikaConfigFile = null;
BlobStore blobStore = null;
BinaryResourceProvider binaryResourceProvider = null;
BinaryStats stats = null;
String path = "/";
if (options.has(tikaConfigSpec)) {
tikaConfigFile = tikaConfigSpec.value(options);
checkArgument(tikaConfigFile.exists(), "Tika config file %s does not exist", tikaConfigFile.getAbsolutePath());
}
if (options.has(storeDirSpec)) {
storeDir = storeDirSpec.value(options);
if (storeDir.exists()) {
checkArgument(storeDir.isDirectory(), "Path [%s] specified for storing extracted " + "text content '%s' is not a directory", storeDir.getAbsolutePath(), storeDirSpec.options());
}
}
if (options.has(fdsDirSpec)) {
File fdsDir = fdsDirSpec.value(options);
checkArgument(fdsDir.exists(), "FileDataStore %s does not exist", fdsDir.getAbsolutePath());
FileDataStore fds = new FileDataStore();
fds.setPath(fdsDir.getAbsolutePath());
fds.init(null);
blobStore = new DataStoreBlobStore(fds);
}
if (options.has(s3ConfigSpec)) {
File s3Config = s3ConfigSpec.value(options);
checkArgument(s3Config.exists() && s3Config.canRead(), "S3DataStore config cannot be read from [%s]", s3Config.getAbsolutePath());
Properties props = loadProperties(s3Config);
log.info("Loaded properties for S3DataStore from {}", s3Config.getAbsolutePath());
String pathProp = "path";
String repoPath = props.getProperty(pathProp);
checkNotNull(repoPath, "Missing required property [%s] from S3DataStore config loaded from [%s]", pathProp, s3Config);
//Check if 'secret' key is defined. It should be non null for references
//to be generated. As the ref are transient we can just use any random value
//if not specified
String secretConfig = "secret";
if (props.getProperty(secretConfig) == null) {
props.setProperty(secretConfig, UUID.randomUUID().toString());
}
log.info("Using {} for S3DataStore ", repoPath);
DataStore ds = createS3DataStore(props);
PropertiesUtil.populate(ds, toMap(props), false);
ds.init(pathProp);
blobStore = new DataStoreBlobStore(ds);
closer.register(asCloseable(ds));
}
if (options.has(dataFileSpec)) {
dataFile = dataFileSpec.value(options);
}
checkNotNull(dataFile, "Data file not configured with %s", dataFileSpec);
if (report || extract) {
checkArgument(dataFile.exists(), "Data file %s does not exist", dataFile.getAbsolutePath());
binaryResourceProvider = new CSVFileBinaryResourceProvider(dataFile, blobStore);
if (binaryResourceProvider instanceof Closeable) {
closer.register((Closeable) binaryResourceProvider);
}
stats = new BinaryStats(tikaConfigFile, binaryResourceProvider);
String summary = stats.getSummary();
log.info(summary);
}
if (generate) {
String src = nodeStoreSpec.value(options);
checkNotNull(blobStore, "BlobStore found to be null. FileDataStore directory " + "must be specified via %s", fdsDirSpec.options());
checkNotNull(dataFile, "Data file path not provided");
NodeStore nodeStore = bootStrapNodeStore(src, blobStore, closer);
BinaryResourceProvider brp = new NodeStoreBinaryResourceProvider(nodeStore, blobStore);
CSVFileGenerator generator = new CSVFileGenerator(dataFile);
generator.generate(brp.getBinaries(path));
}
if (extract) {
checkNotNull(storeDir, "Directory to store extracted text content " + "must be specified via %s", storeDirSpec.options());
checkNotNull(blobStore, "BlobStore found to be null. FileDataStore directory " + "must be specified via %s", fdsDirSpec.options());
DataStoreTextWriter writer = new DataStoreTextWriter(storeDir, false);
TextExtractor extractor = new TextExtractor(writer);
if (options.has(poolSize)) {
extractor.setThreadPoolSize(poolSize.value(options));
}
if (tikaConfigFile != null) {
extractor.setTikaConfig(tikaConfigFile);
}
if (options.has(pathSpec)) {
path = pathSpec.value(options);
}
closer.register(writer);
closer.register(extractor);
extractor.setStats(stats);
log.info("Using path {}", path);
extractor.extract(binaryResourceProvider.getBinaries(path));
extractor.close();
writer.close();
}
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
use of org.apache.jackrabbit.core.data.DataStore in project jackrabbit-oak by apache.
the class Utils method bootstrapDataStore.
@Nullable
public static GarbageCollectableBlobStore bootstrapDataStore(String[] args, Closer closer) throws IOException, RepositoryException {
OptionParser parser = new OptionParser();
parser.allowsUnrecognizedOptions();
ArgumentAcceptingOptionSpec<String> s3dsConfig = parser.accepts("s3ds", "S3DataStore config").withRequiredArg().ofType(String.class);
ArgumentAcceptingOptionSpec<String> fdsConfig = parser.accepts("fds", "FileDataStore config").withRequiredArg().ofType(String.class);
ArgumentAcceptingOptionSpec<String> azureBlobDSConfig = parser.accepts("azureblobds", "AzureBlobStorageDataStore config").withRequiredArg().ofType(String.class);
OptionSet options = parser.parse(args);
if (!options.has(s3dsConfig) && !options.has(fdsConfig) && !options.has(azureBlobDSConfig)) {
return null;
}
DataStore delegate;
if (options.has(s3dsConfig)) {
SharedS3DataStore s3ds = new SharedS3DataStore();
String cfgPath = s3dsConfig.value(options);
Properties props = loadAndTransformProps(cfgPath);
s3ds.setProperties(props);
File homeDir = Files.createTempDir();
closer.register(asCloseable(homeDir));
s3ds.init(homeDir.getAbsolutePath());
delegate = s3ds;
} else if (options.has(azureBlobDSConfig)) {
AzureDataStore azureds = new AzureDataStore();
String cfgPath = azureBlobDSConfig.value(options);
Properties props = loadAndTransformProps(cfgPath);
azureds.setProperties(props);
File homeDir = Files.createTempDir();
azureds.init(homeDir.getAbsolutePath());
closer.register(asCloseable(homeDir));
delegate = azureds;
} else {
delegate = new OakFileDataStore();
String cfgPath = fdsConfig.value(options);
Properties props = loadAndTransformProps(cfgPath);
populate(delegate, asMap(props), true);
delegate.init(null);
}
DataStoreBlobStore blobStore = new DataStoreBlobStore(delegate);
closer.register(Utils.asCloseable(blobStore));
return blobStore;
}
use of org.apache.jackrabbit.core.data.DataStore in project jackrabbit-oak by apache.
the class BlobStoreFixtureProvider method create.
@CheckForNull
public static BlobStoreFixture create(Options options) throws Exception {
BlobStoreOptions bsopts = options.getOptionBean(BlobStoreOptions.class);
if (bsopts == null) {
return null;
}
Type bsType = bsopts.getBlobStoreType();
if (bsType == Type.NONE) {
return null;
}
Closer closer = Closer.create();
DataStore delegate;
if (bsType == Type.S3) {
SharedS3DataStore s3ds = new SharedS3DataStore();
Properties props = loadAndTransformProps(bsopts.getS3ConfigPath());
s3ds.setProperties(props);
File homeDir = Files.createTempDir();
closer.register(asCloseable(homeDir));
s3ds.init(homeDir.getAbsolutePath());
delegate = s3ds;
} else if (bsType == Type.AZURE) {
AzureDataStore azureds = new AzureDataStore();
String cfgPath = bsopts.getAzureConfigPath();
Properties props = loadAndTransformProps(cfgPath);
azureds.setProperties(props);
File homeDir = Files.createTempDir();
azureds.init(homeDir.getAbsolutePath());
closer.register(asCloseable(homeDir));
delegate = azureds;
} else if (bsType == Type.FAKE) {
FileDataStore fakeDs = new DummyDataStore();
fakeDs.setPath(bsopts.getFakeDataStorePath());
fakeDs.init(null);
delegate = fakeDs;
} else {
FileDataStore fds = new OakFileDataStore();
delegate = fds;
if (bsopts.getFDSPath() != null) {
fds.setPath(bsopts.getFDSPath());
} else {
String cfgPath = bsopts.getFDSConfigPath();
Properties props = loadAndTransformProps(cfgPath);
populate(delegate, asMap(props), true);
}
delegate.init(null);
}
DataStoreBlobStore blobStore = new DataStoreBlobStore(delegate);
return new DataStoreFixture(blobStore, closer, !options.getCommonOpts().isReadWrite());
}
use of org.apache.jackrabbit.core.data.DataStore in project jackrabbit by apache.
the class RepositoryImpl method doShutdown.
/**
* Protected method that performs the actual shutdown after the shutdown
* lock has been acquired by the {@link #shutdown()} method.
*/
protected synchronized void doShutdown() {
log.info("Shutting down repository...");
// stop optional cluster node
ClusterNode clusterNode = context.getClusterNode();
if (clusterNode != null) {
clusterNode.stop();
}
if (securityMgr != null) {
securityMgr.close();
}
// close active user sessions
// (copy sessions to array to avoid ConcurrentModificationException;
// manually copy entries rather than calling ReferenceMap#toArray() in
// order to work around http://issues.apache.org/bugzilla/show_bug.cgi?id=25551)
List<Session> sa;
synchronized (activeSessions) {
sa = new ArrayList<Session>(activeSessions.size());
for (Session session : activeSessions.values()) {
sa.add(session);
}
}
for (Session session : sa) {
if (session != null) {
session.logout();
}
}
// shutdown system search manager if there is one
if (systemSearchMgr != null) {
systemSearchMgr.close();
}
// shut down workspaces
synchronized (wspInfos) {
for (WorkspaceInfo wspInfo : wspInfos.values()) {
wspInfo.dispose();
}
}
try {
InternalVersionManager m = context.getInternalVersionManager();
if (m != null) {
m.close();
}
} catch (Exception e) {
log.error("Error while closing Version Manager.", e);
}
repDescriptors.clear();
DataStore dataStore = context.getDataStore();
if (dataStore != null) {
try {
// close the datastore
dataStore.close();
} catch (DataStoreException e) {
log.error("error while closing datastore", e);
}
}
try {
// close repository file system
context.getFileSystem().close();
} catch (FileSystemException e) {
log.error("error while closing repository file system", e);
}
try {
nodeIdFactory.close();
} catch (RepositoryException e) {
log.error("error while closing repository file system", e);
}
// make sure this instance is not used anymore
disposed = true;
// wake up threads waiting on this instance's monitor (e.g. workspace janitor)
notifyAll();
// Shut down the executor service
ScheduledExecutorService executor = context.getExecutor();
executor.shutdown();
try {
// Wait for all remaining background threads to terminate
if (!executor.awaitTermination(10, TimeUnit.SECONDS)) {
log.warn("Attempting to forcibly shutdown runaway threads");
executor.shutdownNow();
}
} catch (InterruptedException e) {
log.warn("Interrupted while waiting for background threads", e);
}
repConfig.getConnectionFactory().close();
// finally release repository lock
if (repLock != null) {
try {
repLock.release();
} catch (RepositoryException e) {
log.error("failed to release the repository lock", e);
}
}
log.info("Repository has been shutdown");
}
use of org.apache.jackrabbit.core.data.DataStore in project jackrabbit by apache.
the class RepositoryConfigurationParser method getDataStoreFactory.
/**
* Parses data store configuration. Data store configuration uses the following format:
* <pre>
* <DataStore class="...">
* <param name="..." value="...">
* ...
* </DataStore>
* </pre>
* Its also possible to configure a multi data store. The configuration uses following format:
* <pre>
* <DataStore class="org.apache.jackrabbit.core.data.MultiDataStore">
* <param name="primary" value="org.apache.jackrabbit.core.data.db.XXDataStore">
* <param name="..." value="...">
* ...
* </param>
* <param name="archive" value="org.apache.jackrabbit.core.data.db.XXDataStore">
* <param name="..." value="...">
* ...
* </param>
* </DataStore>
* </pre>
* <p>
* <code>DataStore</code> is a {@link #parseBeanConfig(Element,String) bean configuration}
* element.
*
* @param parent configuration element
* @param directory the repository directory
* @return data store factory
* @throws ConfigurationException if the configuration is broken
*/
protected DataStoreFactory getDataStoreFactory(final Element parent, final String directory) throws ConfigurationException {
return new DataStoreFactory() {
public DataStore getDataStore() throws RepositoryException {
NodeList children = parent.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
if (child.getNodeType() == Node.ELEMENT_NODE && DATA_STORE_ELEMENT.equals(child.getNodeName())) {
BeanConfig bc = parseBeanConfig(parent, DATA_STORE_ELEMENT);
bc.setValidate(false);
DataStore store = bc.newInstance(DataStore.class);
if (store instanceof MultiDataStore) {
DataStore primary = null;
DataStore archive = null;
NodeList subParamNodes = child.getChildNodes();
for (int x = 0; x < subParamNodes.getLength(); x++) {
Node paramNode = subParamNodes.item(x);
if (paramNode.getNodeType() == Node.ELEMENT_NODE && (PRIMARY_DATASTORE_ATTRIBUTE.equals(paramNode.getAttributes().getNamedItem("name").getNodeValue()) || ARCHIVE_DATASTORE_ATTRIBUTE.equals(paramNode.getAttributes().getNamedItem("name").getNodeValue()))) {
try {
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
Element newParent = document.createElement("parent");
document.appendChild(newParent);
Element datastoreElement = document.createElement(DATA_STORE_ELEMENT);
newParent.appendChild(datastoreElement);
NodeList childNodes = paramNode.getChildNodes();
for (int y = 0; childNodes.getLength() > y; y++) {
datastoreElement.appendChild(document.importNode(childNodes.item(y), true));
}
NamedNodeMap attributes = paramNode.getAttributes();
for (int z = 0; attributes.getLength() > z; z++) {
Node item = attributes.item(z);
datastoreElement.setAttribute(CLASS_ATTRIBUTE, item.getNodeValue());
}
DataStore subDataStore = getDataStoreFactory(newParent, directory).getDataStore();
if (!MultiDataStoreAware.class.isAssignableFrom(subDataStore.getClass())) {
throw new ConfigurationException("Only MultiDataStoreAware datastore's can be used within a MultiDataStore.");
}
String type = getAttribute((Element) paramNode, NAME_ATTRIBUTE);
if (PRIMARY_DATASTORE_ATTRIBUTE.equals(type)) {
primary = subDataStore;
} else if (ARCHIVE_DATASTORE_ATTRIBUTE.equals(type)) {
archive = subDataStore;
}
} catch (Exception e) {
throw new ConfigurationException("Failed to parse the MultiDataStore element.", e);
}
}
}
if (primary == null || archive == null) {
throw new ConfigurationException("A MultiDataStore must have configured a primary and archive datastore");
}
((MultiDataStore) store).setPrimaryDataStore(primary);
((MultiDataStore) store).setArchiveDataStore(archive);
}
store.init(directory);
return store;
}
}
return null;
}
};
}
Aggregations