| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- # -*- coding:utf-8 -*-
- import config
- import tools
- import mmap
- import logging
- TITLE = "关键词索引"
- def main():
- # 日志配置初始化
- tools.init_log()
- tools.log_start_msg(TITLE)
- # 关键词索引容器
- key_index_cache = {}
- with open(config.KEY_FILE, "r", encoding=config.ENCODING_CHARSET) as fkey, \
- mmap.mmap(fkey.fileno(), 0, access=mmap.ACCESS_READ) as fmmap:
-
- # 总大小
- total_num = fmmap.size()
- while True:
- # 读取光标位置
- cur_pos = fmmap.tell()
- # 把光标移动到下一行
- line = fmmap.readline().decode(config.ENCODING_CHARSET)
- # 如果没有数据则结束
- if not line :
- break
-
- # 获取关键词序号
- index = line.index(",")
- # 建立关键词序号和位置的关系
- key_index_cache[line[:index]]=cur_pos
-
- # 进度显示
- tools.tip_in_size(total_num, cur_pos)
-
- # 保存索引
- tools.save_obj(config.KEY_INDEX_CACHE, key_index_cache)
- tools.log_end_msg(TITLE)
-
- if __name__ == '__main__':
- main()
- # key_index_cache = tools.load_obj(config.KEY_INDEX_CACHE)
- # with open(config.KEY_FILE, "r", encoding=config.ENCODING_CHARSET) as fkey, \
- # mmap.mmap(fkey.fileno(), 0, access=mmap.ACCESS_READ) as fmmap:
- # for key,value in key_index_cache.items():
- # fmmap.seek(value)
- # line = fmmap.readline().decode(config.ENCODING_CHARSET)
- # logging.debug("key: %s, value: %d, 内容:%s" % (key, value, line))
|