Changeset - b3c8a3a5ce5f
[Not reviewed]
beta
0 1 0
Marcin Kuzminski - 13 years ago 2012-11-21 23:01:37
marcin@python-works.com
fixed issues some people had with whoosh indexers and unicode decode
errors when building os paths
1 file changed with 2 insertions and 2 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/indexers/daemon.py
Show inline comments
 
@@ -20,49 +20,49 @@
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
from __future__ import with_statement
 

	
 
import os
 
import sys
 
import logging
 
import traceback
 

	
 
from shutil import rmtree
 
from time import mktime
 

	
 
from os.path import dirname as dn
 
from os.path import join as jn
 

	
 
#to get the rhodecode import
 
project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 
sys.path.append(project_path)
 

	
 
from rhodecode.config.conf import INDEX_EXTENSIONS
 
from rhodecode.model.scm import ScmModel
 
from rhodecode.lib.utils2 import safe_unicode
 
from rhodecode.lib.utils2 import safe_unicode, safe_str
 
from rhodecode.lib.indexers import SCHEMA, IDX_NAME, CHGSETS_SCHEMA, \
 
    CHGSET_IDX_NAME
 

	
 
from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \
 
    NodeDoesNotExistError
 

	
 
from whoosh.index import create_in, open_dir, exists_in
 
from whoosh.query import *
 
from whoosh.qparser import QueryParser
 

	
 
log = logging.getLogger('whoosh_indexer')
 

	
 

	
 
class WhooshIndexingDaemon(object):
 
    """
 
    Daemon for atomic indexing jobs
 
    """
 

	
 
    def __init__(self, indexname=IDX_NAME, index_location=None,
 
                 repo_location=None, sa=None, repo_list=None,
 
                 repo_update_list=None):
 
        self.indexname = indexname
 

	
 
        self.index_location = index_location
 
@@ -98,49 +98,49 @@ class WhooshIndexingDaemon(object):
 
        self.initial = True
 
        if not os.path.isdir(self.index_location):
 
            os.makedirs(self.index_location)
 
            log.info('Cannot run incremental index since it does not'
 
                     ' yet exist running full build')
 
        elif not exists_in(self.index_location, IDX_NAME):
 
            log.info('Running full index build as the file content'
 
                     ' index does not exist')
 
        elif not exists_in(self.index_location, CHGSET_IDX_NAME):
 
            log.info('Running full index build as the changeset'
 
                     ' index does not exist')
 
        else:
 
            self.initial = False
 

	
 
    def get_paths(self, repo):
 
        """
 
        recursive walk in root dir and return a set of all path in that dir
 
        based on repository walk function
 
        """
 
        index_paths_ = set()
 
        try:
 
            tip = repo.get_changeset('tip')
 
            for _topnode, _dirs, files in tip.walk('/'):
 
                for f in files:
 
                    index_paths_.add(jn(repo.path, f.path))
 
                    index_paths_.add(jn(safe_str(repo.path), safe_str(f.path)))
 

	
 
        except RepositoryError:
 
            log.debug(traceback.format_exc())
 
            pass
 
        return index_paths_
 

	
 
    def get_node(self, repo, path):
 
        n_path = path[len(repo.path) + 1:]
 
        node = repo.get_changeset().get_node(n_path)
 
        return node
 

	
 
    def get_node_mtime(self, node):
 
        return mktime(node.last_changeset.date.timetuple())
 

	
 
    def add_doc(self, writer, path, repo, repo_name):
 
        """
 
        Adding doc to writer this function itself fetches data from
 
        the instance of vcs backend
 
        """
 

	
 
        node = self.get_node(repo, path)
 
        indexed = indexed_w_content = 0
 
        # we just index the content of chosen files, and skip binary files
 
        if node.extension in INDEX_EXTENSIONS and not node.is_binary:
0 comments (0 inline, 0 general)