# HG changeset patch # User Ludovic Chabant # Date 1357532228 28800 # Node ID e3c05dccc6ddb743fcad9a95f3a6844bbc2fd879 # Parent 420ff74c2e287068d86ee5a4482c8486e00fe2e1 The indexer is now opening files in UTF-8. diff -r 420ff74c2e28 -r e3c05dccc6dd wikked/indexer.py --- a/wikked/indexer.py Sun Jan 06 20:16:19 2013 -0800 +++ b/wikked/indexer.py Sun Jan 06 20:17:08 2013 -0800 @@ -1,5 +1,6 @@ import os import os.path +import codecs import logging from whoosh.index import create_in, open_dir from whoosh.fields import Schema, ID, KEYWORD, TEXT, STORED @@ -91,8 +92,8 @@ 'url': hit['url'] } page_info['title_highlights'] = hit.highlights('title') - with open(hit['path']) as f: - content = unicode(f.read()) + with codecs.open(hit['path'], 'r', encoding='utf-8') as f: + content = f.read() page_info['content_highlights'] = hit.highlights('content', text=content) page_infos.append(page_info) return page_infos