Changeset - 2b7a0e28c4dc
[Not reviewed]
default
0 2 0
Takumi IINO - 10 years ago 2015-10-16 17:12:50
trot.thunder@gmail.com
indexers: introduce list of filenames for indexing

Before this patch, we cannot search files that without file extension (like
Makefile) and dotfiles (like .hgtags).

This patch makes it possible to search for these files by introducing a list
of filenames for indexing. The list is currently empty.
2 files changed with 15 insertions and 7 deletions:
0 comments (0 inline, 0 general)
kallithea/config/conf.py
Show inline comments
 
@@ -29,18 +29,20 @@ from kallithea.lib.utils2 import __get_l
 

	
 

	
 
# language map is also used by whoosh indexer, which for those specified
 
# extensions will index it's content
 
LANGUAGES_EXTENSIONS_MAP = __get_lem()
 

	
 
#==============================================================================
 
# WHOOSH INDEX EXTENSIONS
 
#==============================================================================
 
# EXTENSIONS WE WANT TO INDEX CONTENT OFF USING WHOOSH
 
# Whoosh index targets
 

	
 
# Extensions we want to index content of using whoosh
 
INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
 

	
 
# Filenames we want to index content of using whoosh
 
INDEX_FILENAMES = []
 

	
 
# list of readme files to search in file tree and display in summary
 
# attached weights defines the search  order lower is first
 
ALL_READMES = [
 
    ('readme', 0), ('README', 0), ('Readme', 0),
 
    ('doc/readme', 1), ('doc/README', 1), ('doc/Readme', 1),
 
    ('Docs/readme', 2), ('Docs/README', 2), ('Docs/Readme', 2),
kallithea/lib/indexers/daemon.py
Show inline comments
 
@@ -38,13 +38,13 @@ from os.path import dirname as dn
 
from os.path import join as jn
 

	
 
# Add location of top level folder to sys.path
 
project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 
sys.path.append(project_path)
 

	
 
from kallithea.config.conf import INDEX_EXTENSIONS
 
from kallithea.config.conf import INDEX_EXTENSIONS, INDEX_FILENAMES
 
from kallithea.model.scm import ScmModel
 
from kallithea.model.db import Repository
 
from kallithea.lib.utils2 import safe_unicode, safe_str
 
from kallithea.lib.indexers import SCHEMA, IDX_NAME, CHGSETS_SCHEMA, \
 
    CHGSET_IDX_NAME
 

	
 
@@ -159,25 +159,31 @@ class WhooshIndexingDaemon(object):
 
        assert path[len(repo_path)] in (os.path.sep, os.path.altsep)
 
        node_path = path[len(repo_path) + 1:]
 
        cs = self._get_index_changeset(repo, index_rev=index_rev)
 
        node = cs.get_node(node_path)
 
        return node
 

	
 
    def is_indexable_node(self, node):
 
        """
 
        Just index the content of chosen files, skipping binary files
 
        """
 
        return (node.extension in INDEX_EXTENSIONS or node.name in INDEX_FILENAMES) and \
 
               not node.is_binary
 

	
 
    def get_node_mtime(self, node):
 
        return mktime(node.last_changeset.date.timetuple())
 

	
 
    def add_doc(self, writer, path, repo, repo_name, index_rev=None):
 
        """
 
        Adding doc to writer this function itself fetches data from
 
        the instance of vcs backend
 
        """
 

	
 
        node = self.get_node(repo, path, index_rev)
 
        indexed = indexed_w_content = 0
 
        # we just index the content of chosen files, and skip binary files
 
        if node.extension in INDEX_EXTENSIONS and not node.is_binary:
 
        if self.is_indexable_node(node):
 
            u_content = node.content
 
            if not isinstance(u_content, unicode):
 
                log.warning('  >> %s Could not get this content as unicode '
 
                            'replacing with empty content' % path)
 
                u_content = u''
 
            else:
0 comments (0 inline, 0 general)