Search in sources :

Example 6 with CodeIndexDocument

use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.

the class CodeIndexerTest method testIndexDocumentsEmptyIssue.

// TODO fix the assert rather then programming by exception
public void testIndexDocumentsEmptyIssue() {
    try {
        CodeIndexDocument cid = new CodeIndexDocument("repoLocationRepoNameLocationFilename", "", "fileName", "fileLocation", "fileLocationFilename", "md5hash", "languageName", 0, null, "repoRemoteLocation", "codeOwner");
        Queue queue = new ConcurrentArrayQueue<CodeIndexDocument>();
        queue.add(cid);
        Singleton.getCodeIndexer().indexDocuments(queue);
    } catch (Exception ex) {
        assertTrue(false);
    }
}
Also used : ConcurrentArrayQueue(org.eclipse.jetty.util.ConcurrentArrayQueue) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) Queue(java.util.Queue) ConcurrentArrayQueue(org.eclipse.jetty.util.ConcurrentArrayQueue) IOException(java.io.IOException)

Example 7 with CodeIndexDocument

use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.

the class CodeSearcherTest method testGetProjectStats.

public void testGetProjectStats() throws IOException {
    CodeIndexDocument codeIndexDocument = new CodeIndexDocument("/", "testGetRepoDocuments", "/", "/", "/", "md5hash", "Java", 10, "", "/", "/");
    Singleton.getCodeIndexer().indexDocument(codeIndexDocument);
    CodeSearcher cs = new CodeSearcher();
    ProjectStats projectStats = cs.getProjectStats("testGetRepoDocuments");
    assertThat(projectStats.getTotalFiles()).isEqualTo(1);
    assertThat(projectStats.getTotalCodeLines()).isEqualTo(10);
    assertThat(projectStats.getCodeFacetLanguages().get(0).getLanguageName()).isEqualTo("Java");
    assertThat(projectStats.getCodeFacetLanguages().get(0).getCount()).isEqualTo(1);
    assertThat(projectStats.getRepoFacetOwner().get(0).getOwner()).isEqualTo("/");
    assertThat(projectStats.getRepoFacetOwner().get(0).getCount()).isEqualTo(1);
    assertThat(projectStats.getCodeByLines().size()).isEqualTo(1);
}
Also used : ProjectStats(com.searchcode.app.dto.ProjectStats) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument)

Example 8 with CodeIndexDocument

use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.

the class CodeIndexerTest method testBuildDocument.

public void testBuildDocument() {
    CodeIndexer codeIndexer = new CodeIndexer();
    Document indexableFields = codeIndexer.buildDocument(new CodeIndexDocument("repoLocationRepoNameLocationFilename", "repo Name", "fileName", "fileLocation", "fileLocationFilename", "md5hash", "language Name", 10, "contents", "repoRemoteLocation", "code Owner"));
    assertThat(indexableFields.getFields().size()).isEqualTo(16);
    IndexableField[] fields = indexableFields.getFields(Values.REPONAME);
    assertThat(fields[0].stringValue()).isEqualTo("repo_Name");
    fields = indexableFields.getFields(Values.LANGUAGENAME);
    assertThat(fields[0].stringValue()).isEqualTo("language_Name");
    fields = indexableFields.getFields(Values.CODEOWNER);
    assertThat(fields[0].stringValue()).isEqualTo("code_Owner");
    // Verifies that we ran through the pipeline
    fields = indexableFields.getFields(Values.CONTENTS);
    assertThat(fields[0].stringValue()).isEqualTo(" filename filename filename filename filename filename  file name filelocationfilename filelocation contents contents contents contents contents contents");
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) Document(org.apache.lucene.document.Document) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument)

Example 9 with CodeIndexDocument

use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.

the class IndexBaseRepoJob method indexDocsByDelta.

/**
     * Indexes all the documents in the repository changed file effectively performing a delta update
     * Should only be called when there is a genuine update IE something was indexed previously and
     * has has a new commit.
     */
public void indexDocsByDelta(Path path, String repoName, String repoLocations, String repoRemoteLocation, RepositoryChanged repositoryChanged) {
    // Should have data object by this point
    SearchcodeLib scl = Singleton.getSearchCodeLib();
    Queue<CodeIndexDocument> codeIndexDocumentQueue = Singleton.getCodeIndexQueue();
    String fileRepoLocations = FilenameUtils.separatorsToUnix(repoLocations);
    // Used to hold the reports of what was indexed
    List<String[]> reportList = new ArrayList<>();
    for (String changedFile : repositoryChanged.getChangedFiles()) {
        if (this.shouldJobPauseOrTerminate()) {
            return;
        }
        if (Singleton.getDataService().getPersistentDelete().contains(repoName)) {
            return;
        }
        String[] split = changedFile.split("/");
        String fileName = split[split.length - 1];
        changedFile = fileRepoLocations + "/" + repoName + "/" + changedFile;
        changedFile = changedFile.replace("//", "/");
        CodeLinesReturn codeLinesReturn = this.getCodeLines(changedFile, reportList);
        if (codeLinesReturn.isError()) {
            break;
        }
        IsMinifiedReturn isMinified = this.getIsMinified(codeLinesReturn.getCodeLines(), fileName, reportList);
        if (isMinified.isMinified()) {
            break;
        }
        if (this.checkIfEmpty(codeLinesReturn.getCodeLines(), changedFile, reportList)) {
            break;
        }
        if (this.determineBinary(changedFile, fileName, codeLinesReturn.getCodeLines(), reportList)) {
            break;
        }
        String md5Hash = this.getFileMd5(changedFile);
        String languageName = Singleton.getFileClassifier().languageGuesser(changedFile, codeLinesReturn.getCodeLines());
        String fileLocation = this.getRelativeToProjectPath(path.toString(), changedFile);
        String fileLocationFilename = changedFile.replace(fileRepoLocations, Values.EMPTYSTRING);
        String repoLocationRepoNameLocationFilename = changedFile;
        String newString = this.getBlameFilePath(fileLocationFilename);
        String codeOwner = this.getCodeOwner(codeLinesReturn.getCodeLines(), newString, repoName, fileRepoLocations, scl);
        if (this.LOWMEMORY) {
            try {
                Singleton.getCodeIndexer().indexDocument(new CodeIndexDocument(repoLocationRepoNameLocationFilename, repoName, fileName, fileLocation, fileLocationFilename, md5Hash, languageName, codeLinesReturn.getCodeLines().size(), StringUtils.join(codeLinesReturn.getCodeLines(), " "), repoRemoteLocation, codeOwner));
            } catch (IOException ex) {
                Singleton.getLogger().warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass() + "\n with message: " + ex.getMessage());
            }
        } else {
            this.sharedService.incrementCodeIndexLinesCount(codeLinesReturn.getCodeLines().size());
            codeIndexDocumentQueue.add(new CodeIndexDocument(repoLocationRepoNameLocationFilename, repoName, fileName, fileLocation, fileLocationFilename, md5Hash, languageName, codeLinesReturn.getCodeLines().size(), StringUtils.join(codeLinesReturn.getCodeLines(), " "), repoRemoteLocation, codeOwner));
        }
        if (this.LOGINDEXED) {
            reportList.add(new String[] { changedFile, "included", "" });
        }
    }
    if (this.LOGINDEXED && reportList.isEmpty() == false) {
        this.logIndexed(repoName + "_delta", reportList);
    }
    for (String deletedFile : repositoryChanged.getDeletedFiles()) {
        deletedFile = fileRepoLocations + "/" + repoName + "/" + deletedFile;
        deletedFile = deletedFile.replace("//", "/");
        Singleton.getLogger().info("Missing from disk, removing from index " + deletedFile);
        try {
            Singleton.getCodeIndexer().deleteByCodeId(DigestUtils.sha1Hex(deletedFile));
        } catch (IOException ex) {
            Singleton.getLogger().warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass() + " indexDocsByDelta deleteByFileLocationFilename for " + repoName + " " + deletedFile + "\n with message: " + ex.getMessage());
        }
    }
}
Also used : SearchcodeLib(com.searchcode.app.util.SearchcodeLib) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) IOException(java.io.IOException)

Example 10 with CodeIndexDocument

use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.

the class CodeIndexer method indexDocuments.

/**
     * Given a queue of documents to index, index them by popping the queue limited to default of 1000 items.
     * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
     * index.
     * TODO investigate how Lucene deals with multiple writes
     */
public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
    Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION);
    Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION);
    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
    FacetsConfig facetsConfig;
    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory);
    try {
        CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
        int count = 0;
        while (codeIndexDocument != null) {
            Singleton.getLogger().info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
            this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());
            facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
            Document doc = this.buildDocument(codeIndexDocument);
            writer.updateDocument(new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, doc));
            count++;
            if (count >= INDEX_QUEUE_BATCH_SIZE) {
                codeIndexDocument = null;
            } else {
                codeIndexDocument = codeIndexDocumentQueue.poll();
            }
        }
    } finally {
        try {
            writer.close();
        } finally {
            taxonomyWriter.close();
        }
        Singleton.getLogger().info("Closing writers");
    }
}
Also used : DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) FacetsConfig(org.apache.lucene.facet.FacetsConfig) IndexWriter(org.apache.lucene.index.IndexWriter) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) Term(org.apache.lucene.index.Term) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

CodeIndexDocument (com.searchcode.app.dto.CodeIndexDocument)10 IOException (java.io.IOException)3 CodeAnalyzer (com.searchcode.app.util.CodeAnalyzer)2 Date (java.util.Date)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 FacetsConfig (org.apache.lucene.facet.FacetsConfig)2 SortedSetDocValuesFacetField (org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField)2 TaxonomyWriter (org.apache.lucene.facet.taxonomy.TaxonomyWriter)2 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)2 IndexWriter (org.apache.lucene.index.IndexWriter)2 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)2 Term (org.apache.lucene.index.Term)2 Directory (org.apache.lucene.store.Directory)2 FSDirectory (org.apache.lucene.store.FSDirectory)2 ProjectStats (com.searchcode.app.dto.ProjectStats)1 GitService (com.searchcode.app.service.GitService)1 SearchcodeLib (com.searchcode.app.util.SearchcodeLib)1 SimpleDateFormat (java.text.SimpleDateFormat)1 Queue (java.util.Queue)1 Document (org.apache.lucene.document.Document)1