Mercurial > wikked
changeset 31:e3c05dccc6dd
The indexer is now opening files in UTF-8.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 06 Jan 2013 20:17:08 -0800 |
parents | 420ff74c2e28 |
children | 7bb67ac28c3c |
files | wikked/indexer.py |
diffstat | 1 files changed, 3 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/wikked/indexer.py Sun Jan 06 20:16:19 2013 -0800 +++ b/wikked/indexer.py Sun Jan 06 20:17:08 2013 -0800 @@ -1,5 +1,6 @@ import os import os.path +import codecs import logging from whoosh.index import create_in, open_dir from whoosh.fields import Schema, ID, KEYWORD, TEXT, STORED @@ -91,8 +92,8 @@ 'url': hit['url'] } page_info['title_highlights'] = hit.highlights('title') - with open(hit['path']) as f: - content = unicode(f.read()) + with codecs.open(hit['path'], 'r', encoding='utf-8') as f: + content = f.read() page_info['content_highlights'] = hit.highlights('content', text=content) page_infos.append(page_info) return page_infos