Search in sources :

Example 1 with ExtractedTextCache

use of org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache in project archiva by apache.

the class RepositoryFactory method createRepository.

public Repository createRepository() throws IOException, InvalidFileStoreVersionException {
    createExecutor();
    if (SEGMENT_FILE_TYPE == storeType) {
        fileStore = FileStoreBuilder.fileStoreBuilder(repositoryPath.toFile()).build();
        nodeStore = // 
        SegmentNodeStoreBuilders.builder(fileStore).withStatisticsProvider(// 
        StatisticsProvider.NOOP).build();
    } else if (IN_MEMORY_TYPE == storeType) {
        nodeStore = null;
    } else {
        throw new IllegalArgumentException("Store type " + storeType + " not recognized");
    }
    Oak oak = nodeStore == null ? new Oak() : new Oak(nodeStore);
    oak.with(new RepositoryInitializer() {

        @Override
        public void initialize(@Nonnull NodeBuilder root) {
            log.info("Creating index ");
            NodeBuilder lucene = IndexUtils.getOrCreateOakIndex(root).child("lucene");
            lucene.setProperty(JcrConstants.JCR_PRIMARYTYPE, "oak:QueryIndexDefinition", Type.NAME);
            lucene.setProperty("compatVersion", 2);
            lucene.setProperty("type", "lucene");
            // lucene.setProperty("async", "async");
            lucene.setProperty(INCLUDE_PROPERTY_TYPES, ImmutableSet.of("String"), Type.STRINGS);
            // lucene.setProperty("refresh",true);
            lucene.setProperty("async", ImmutableSet.of("async", "sync"), Type.STRINGS);
            NodeBuilder rules = lucene.child("indexRules").setProperty(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED, Type.NAME);
            rules.setProperty(":childOrder", // 
            ImmutableSet.of(// 
            "archiva:projectVersion", // 
            "archiva:artifact", // 
            "archiva:facet", // 
            "archiva:namespace", // 
            "archiva:project"), Type.STRINGS);
            NodeBuilder allProps = // 
            rules.child("archiva:projectVersion").child(// 
            "properties").setProperty(JcrConstants.JCR_PRIMARYTYPE, "nt:unstructured", // 
            Type.NAME).setProperty(":childOrder", ImmutableSet.of("allProps"), // 
            Type.STRINGS).setProperty("indexNodeName", // 
            true).child(// 
            "allProps").setProperty(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED, Type.NAME);
            allProps.setProperty("name", ".*");
            allProps.setProperty("isRegexp", true);
            allProps.setProperty("nodeScopeIndex", true);
            allProps.setProperty("index", true);
            allProps.setProperty("analyzed", true);
            // allProps.setProperty("propertyIndex",true);
            allProps = // 
            rules.child("archiva:artifact").child(// 
            "properties").setProperty(JcrConstants.JCR_PRIMARYTYPE, "nt:unstructured", // 
            Type.NAME).setProperty(":childOrder", ImmutableSet.of("allProps"), // 
            Type.STRINGS).setProperty("indexNodeName", true).child(// 
            "allProps").setProperty(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED, Type.NAME);
            allProps.setProperty("name", ".*");
            allProps.setProperty("isRegexp", true);
            allProps.setProperty("nodeScopeIndex", true);
            allProps.setProperty("index", true);
            allProps.setProperty("analyzed", true);
            allProps = // 
            rules.child("archiva:facet").child(// 
            "properties").setProperty(JcrConstants.JCR_PRIMARYTYPE, "nt:unstructured", // 
            Type.NAME).setProperty(":childOrder", ImmutableSet.of("allProps"), // 
            Type.STRINGS).setProperty("indexNodeName", // 
            true).child(// 
            "allProps").setProperty(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED, Type.NAME);
            allProps.setProperty("name", ".*");
            allProps.setProperty("isRegexp", true);
            allProps.setProperty("nodeScopeIndex", true);
            allProps.setProperty("index", true);
            allProps.setProperty("analyzed", true);
            allProps = // 
            rules.child("archiva:namespace").child(// 
            "properties").setProperty(JcrConstants.JCR_PRIMARYTYPE, "nt:unstructured", // 
            Type.NAME).setProperty(":childOrder", ImmutableSet.of("allProps"), // 
            Type.STRINGS).setProperty("indexNodeName", // 
            true).child(// 
            "allProps").setProperty(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED, Type.NAME);
            allProps.setProperty("name", ".*");
            allProps.setProperty("isRegexp", true);
            allProps.setProperty("nodeScopeIndex", true);
            allProps.setProperty("index", true);
            allProps.setProperty("analyzed", true);
            allProps = // 
            rules.child("archiva:project").child(// 
            "properties").setProperty(JcrConstants.JCR_PRIMARYTYPE, "nt:unstructured", // 
            Type.NAME).setProperty(":childOrder", ImmutableSet.of("allProps"), // 
            Type.STRINGS).setProperty("indexNodeName", // 
            true).child(// 
            "allProps").setProperty(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED, Type.NAME);
            allProps.setProperty("name", ".*");
            allProps.setProperty("isRegexp", true);
            allProps.setProperty("nodeScopeIndex", true);
            allProps.setProperty("index", true);
            allProps.setProperty("analyzed", true);
            log.info("Index: {} myIndex {}", lucene, lucene.getChildNode("myIndex"));
            log.info("myIndex {}", lucene.getChildNode("myIndex").getProperties());
        // IndexUtils.createIndexDefinition(  )
        }
    });
    StatisticsProvider statsProvider = StatisticsProvider.NOOP;
    int queueSize = Integer.getInteger("queueSize", 10000);
    Path indexDir = Files.createTempDirectory("archiva_index");
    log.info("Queue Index {}", indexDir.toString());
    IndexCopier indexCopier = new IndexCopier(executorService, indexDir.toFile(), true);
    NRTIndexFactory nrtIndexFactory = new NRTIndexFactory(indexCopier, statsProvider);
    MountInfoProvider mountInfoProvider = Mounts.defaultMountInfoProvider();
    IndexTracker tracker = new IndexTracker(new DefaultIndexReaderFactory(mountInfoProvider, indexCopier), nrtIndexFactory);
    DocumentQueue queue = new DocumentQueue(queueSize, tracker, executorService, statsProvider);
    LocalIndexObserver localIndexObserver = new LocalIndexObserver(queue, statsProvider);
    LuceneIndexProvider provider = new LuceneIndexProvider(tracker);
    // ExternalObserverBuilder builder = new ExternalObserverBuilder(queue, tracker, statsProvider,
    // executorService, queueSize);
    // Observer observer = builder.build();
    // builder.getBackgroundObserver();
    // 
    LuceneIndexEditorProvider editorProvider = new // 
    LuceneIndexEditorProvider(// 
    null, // 
    tracker, // 
    new ExtractedTextCache(0, 0), null, mountInfoProvider);
    editorProvider.setIndexingQueue(queue);
    log.info("Oak: {} with nodeStore {}", oak, nodeStore);
    Jcr jcr = // 
    new Jcr(oak).with(editorProvider).with(// 
    (Observer) provider).with(localIndexObserver).with(// 
    (QueryIndexProvider) provider);
    // .withAsyncIndexing( "async", 5 );
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    Repository r = jcr.createRepository();
    stopWatch.stop();
    log.info("time to create jcr repository: {} ms", stopWatch.getTime());
    // }
    return r;
}
Also used : Path(java.nio.file.Path) IndexTracker(org.apache.jackrabbit.oak.plugins.index.lucene.IndexTracker) DocumentQueue(org.apache.jackrabbit.oak.plugins.index.lucene.hybrid.DocumentQueue) NodeBuilder(org.apache.jackrabbit.oak.spi.state.NodeBuilder) IndexCopier(org.apache.jackrabbit.oak.plugins.index.lucene.IndexCopier) StatisticsProvider(org.apache.jackrabbit.oak.stats.StatisticsProvider) StopWatch(org.apache.commons.lang.time.StopWatch) ExtractedTextCache(org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache) Repository(javax.jcr.Repository) LuceneIndexEditorProvider(org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider) LocalIndexObserver(org.apache.jackrabbit.oak.plugins.index.lucene.hybrid.LocalIndexObserver) Oak(org.apache.jackrabbit.oak.Oak) NRTIndexFactory(org.apache.jackrabbit.oak.plugins.index.lucene.hybrid.NRTIndexFactory) Jcr(org.apache.jackrabbit.oak.jcr.Jcr) DefaultIndexReaderFactory(org.apache.jackrabbit.oak.plugins.index.lucene.reader.DefaultIndexReaderFactory) LuceneIndexProvider(org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProvider) RepositoryInitializer(org.apache.jackrabbit.oak.spi.lifecycle.RepositoryInitializer) MountInfoProvider(org.apache.jackrabbit.oak.spi.mount.MountInfoProvider)

Example 2 with ExtractedTextCache

use of org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache in project jackrabbit-oak by apache.

the class ActiveDeletedBlobCollectionIT method createRepository.

@Override
protected ContentRepository createRepository() {
    adbc = new ActiveDeletedBlobCollectorImpl(clock, new File(blobCollectionRoot.getRoot(), "deleted-blobs"), executorService);
    IndexCopier copier = createIndexCopier();
    editorProvider = new LuceneIndexEditorProvider(copier, null, new ExtractedTextCache(10 * FileUtils.ONE_MB, 100), null, Mounts.defaultMountInfoProvider(), adbc);
    provider = new LuceneIndexProvider(copier);
    mongoConnection = connectionFactory.getConnection();
    MongoUtils.dropCollections(mongoConnection.getDatabase());
    if (dataStoreType == DataStoreType.WITHOUT_FDS) {
        MongoBlobStore blobStore = new MongoBlobStore(mongoConnection.getDatabase());
        blobStore.setBlockSize(128);
        blobStore.setBlockSizeMin(48);
        this.blobStore = new CountingBlobStore(blobStore);
    } else {
        FileDataStore fds = new FileDataStore();
        fds.init(fileDataStoreRoot.getRoot().getAbsolutePath());
        DataStoreBlobStore dsbs = new DataStoreBlobStore(fds);
        dsbs.setBlockSize(128);
        this.blobStore = new CountingBlobStore(dsbs);
    }
    nodeStore = new DocumentMK.Builder().setMongoDB(mongoConnection.getMongoClient(), mongoConnection.getDBName()).setBlobStore(this.blobStore).getNodeStore();
    asyncIndexUpdate = new AsyncIndexUpdate("async", nodeStore, editorProvider);
    return new Oak(nodeStore).with(new InitialContent()).with(new OpenSecurityProvider()).with((QueryIndexProvider) provider).with((Observer) provider).with(editorProvider).createContentRepository();
}
Also used : MongoBlobStore(org.apache.jackrabbit.oak.plugins.document.mongo.MongoBlobStore) ActiveDeletedBlobCollectorImpl(org.apache.jackrabbit.oak.plugins.index.lucene.directory.ActiveDeletedBlobCollectorFactory.ActiveDeletedBlobCollectorImpl) AsyncIndexUpdate(org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate) OpenSecurityProvider(org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider) IndexCopier(org.apache.jackrabbit.oak.plugins.index.lucene.IndexCopier) QueryIndexProvider(org.apache.jackrabbit.oak.spi.query.QueryIndexProvider) ExtractedTextCache(org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache) InitialContent(org.apache.jackrabbit.oak.InitialContent) LuceneIndexEditorProvider(org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider) Oak(org.apache.jackrabbit.oak.Oak) LuceneIndexProvider(org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProvider) File(java.io.File) FileDataStore(org.apache.jackrabbit.core.data.FileDataStore) DataStoreBlobStore(org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore)

Example 3 with ExtractedTextCache

use of org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache in project jackrabbit-oak by apache.

the class ActiveDeletedBlobSyncTrackerTest method createRepository.

@Override
protected ContentRepository createRepository() {
    try {
        File blobCollectorDeleted = new File(blobCollectionRoot.getRoot(), "deleted-blobs");
        blobCollectorDeleted.mkdirs();
        adbc = new ActiveDeletedBlobCollectorImpl(clock, new File(blobCollectionRoot.getRoot(), "deleted-blobs"), executorService);
        IndexCopier copier = createIndexCopier();
        editorProvider = new LuceneIndexEditorProvider(copier, null, new ExtractedTextCache(10 * FileUtils.ONE_MB, 100), null, Mounts.defaultMountInfoProvider(), adbc);
        provider = new LuceneIndexProvider(copier);
        OakFileDataStore ds = new OakFileDataStore();
        ds.setMinRecordLength(10);
        ds.init(fileDataStoreRoot.getRoot().getAbsolutePath());
        DataStoreBlobStore dsbs = new DataStoreBlobStore(ds);
        this.blobStore = new AbstractActiveDeletedBlobTest.CountingBlobStore(dsbs);
        FileStore store = FileStoreBuilder.fileStoreBuilder(temporaryFolder.getRoot()).withMemoryMapping(false).withBlobStore(blobStore).build();
        nodeStore = SegmentNodeStoreBuilders.builder(store).build();
        BlobTrackingStore trackingStore = (BlobTrackingStore) blobStore;
        trackingStore.addTracker(new BlobIdTracker(blobTrackerRoot.getRoot().getAbsolutePath(), getOrCreateId(nodeStore), 600, dsbs));
        // set the blob store to skip writing blobs through the node store
        editorProvider.setBlobStore(blobStore);
        asyncIndexUpdate = new AsyncIndexUpdate("async", nodeStore, editorProvider);
        return new Oak(nodeStore).with(new InitialContent()).with(new OpenSecurityProvider()).with((QueryIndexProvider) provider).with((Observer) provider).with(editorProvider).createContentRepository();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : OakFileDataStore(org.apache.jackrabbit.oak.plugins.blob.datastore.OakFileDataStore) ActiveDeletedBlobCollectorImpl(org.apache.jackrabbit.oak.plugins.index.lucene.directory.ActiveDeletedBlobCollectorFactory.ActiveDeletedBlobCollectorImpl) AsyncIndexUpdate(org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate) OpenSecurityProvider(org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider) IndexCopier(org.apache.jackrabbit.oak.plugins.index.lucene.IndexCopier) IOException(java.io.IOException) QueryIndexProvider(org.apache.jackrabbit.oak.spi.query.QueryIndexProvider) ExtractedTextCache(org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache) InitialContent(org.apache.jackrabbit.oak.InitialContent) FileStore(org.apache.jackrabbit.oak.segment.file.FileStore) BlobIdTracker(org.apache.jackrabbit.oak.plugins.blob.datastore.BlobIdTracker) LuceneIndexEditorProvider(org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider) BlobTrackingStore(org.apache.jackrabbit.oak.plugins.blob.BlobTrackingStore) Oak(org.apache.jackrabbit.oak.Oak) LuceneIndexProvider(org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProvider) File(java.io.File) DataStoreBlobStore(org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore)

Aggregations

Oak (org.apache.jackrabbit.oak.Oak)3 ExtractedTextCache (org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache)3 IndexCopier (org.apache.jackrabbit.oak.plugins.index.lucene.IndexCopier)3 LuceneIndexEditorProvider (org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider)3 LuceneIndexProvider (org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProvider)3 File (java.io.File)2 InitialContent (org.apache.jackrabbit.oak.InitialContent)2 DataStoreBlobStore (org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore)2 AsyncIndexUpdate (org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate)2 ActiveDeletedBlobCollectorImpl (org.apache.jackrabbit.oak.plugins.index.lucene.directory.ActiveDeletedBlobCollectorFactory.ActiveDeletedBlobCollectorImpl)2 QueryIndexProvider (org.apache.jackrabbit.oak.spi.query.QueryIndexProvider)2 OpenSecurityProvider (org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider)2 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 Repository (javax.jcr.Repository)1 StopWatch (org.apache.commons.lang.time.StopWatch)1 FileDataStore (org.apache.jackrabbit.core.data.FileDataStore)1 Jcr (org.apache.jackrabbit.oak.jcr.Jcr)1 BlobTrackingStore (org.apache.jackrabbit.oak.plugins.blob.BlobTrackingStore)1 BlobIdTracker (org.apache.jackrabbit.oak.plugins.blob.datastore.BlobIdTracker)1