Changeset - c59c4d4323e7
[Not reviewed]
default
0 1 0
Marcin Kuzminski - 15 years ago 2010-09-01 12:13:03
marcin@python-works.com
added support for broken symlinks in whoosh indexer
1 file changed with 11 insertions and 0 deletions:
0 comments (0 inline, 0 general)
pylons_app/lib/indexers/daemon.py
Show inline comments
 
@@ -66,54 +66,65 @@ class WhooshIndexingDaemon(object):
 
                    index_paths_.add(jn(path, f))
 
    
 
        return index_paths_
 
    
 
    def add_doc(self, writer, path, repo):
 
        """Adding doc to writer"""
 
        
 
        ext = unicode(path.split('/')[-1].split('.')[-1].lower())
 
        #we just index the content of choosen files
 
        if ext in INDEX_EXTENSIONS:
 
            log.debug('    >> %s [WITH CONTENT]' % path)
 
            fobj = open(path, 'rb')
 
            content = fobj.read()
 
            fobj.close()
 
            try:
 
                u_content = unicode(content)
 
            except UnicodeDecodeError:
 
                #incase we have a decode error just represent as byte string
 
                u_content = unicode(str(content).encode('string_escape'))
 
        else:
 
            log.debug('    >> %s' % path)
 
            #just index file name without it's content
 
            u_content = u''
 
                
 
        
 
        
 
        try:
 
            os.stat(path)
 
        writer.add_document(owner=unicode(repo.contact),
 
                            repository=u"%s" % repo.name,
 
                            path=u"%s" % path,
 
                            content=u_content,
 
                            modtime=os.path.getmtime(path),
 
                            extension=ext) 
 
        except OSError, e:
 
            import errno
 
            if e.errno == errno.ENOENT:
 
                log.debug('path %s does not exist or is a broken symlink' % path)
 
            else:
 
                raise e                 
 

	
 
    
 
    def build_index(self):
 
        if os.path.exists(IDX_LOCATION):
 
            log.debug('removing previos index')
 
            rmtree(IDX_LOCATION)
 
            
 
        if not os.path.exists(IDX_LOCATION):
 
            os.mkdir(IDX_LOCATION)
 
        
 
        idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
 
        writer = idx.writer()
 
        
 
        for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
 
            log.debug('building index @ %s' % repo.path)
 
        
 
            for idx_path in self.get_paths(repo.path):
 
                self.add_doc(writer, idx_path, repo)
 
        writer.commit(merge=True)
 
                
 
        log.debug('>>> FINISHED BUILDING INDEX <<<')
 
            
 
    
 
    def update_index(self):
 
        log.debug('STARTING INCREMENTAL INDEXING UPDATE')
0 comments (0 inline, 0 general)