# -*- coding:utf-8 -*- import config import tools import mmap import logging TITLE = "关键词索引" def main(): # 日志配置初始化 tools.init_log() tools.log_start_msg(TITLE) # 关键词索引容器 key_index_cache = {} with open(config.KEY_FILE, "r", encoding=config.ENCODING_CHARSET) as fkey, \ mmap.mmap(fkey.fileno(), 0, access=mmap.ACCESS_READ) as fmmap: # 总大小 total_num = fmmap.size() while True: # 读取光标位置 cur_pos = fmmap.tell() # 把光标移动到下一行 line = fmmap.readline().decode(config.ENCODING_CHARSET) # 如果没有数据则结束 if not line : break # 获取关键词序号 index = line.index(",") # 建立关键词序号和位置的关系 key_index_cache[line[:index]]=cur_pos # 进度显示 tools.tip_in_size(total_num, cur_pos) # 保存索引 tools.save_obj(config.KEY_INDEX_CACHE, key_index_cache) tools.log_end_msg(TITLE) if __name__ == '__main__': main() # key_index_cache = tools.load_obj(config.KEY_INDEX_CACHE) # with open(config.KEY_FILE, "r", encoding=config.ENCODING_CHARSET) as fkey, \ # mmap.mmap(fkey.fileno(), 0, access=mmap.ACCESS_READ) as fmmap: # for key,value in key_index_cache.items(): # fmmap.seek(value) # line = fmmap.readline().decode(config.ENCODING_CHARSET) # logging.debug("key: %s, value: %d, 内容:%s" % (key, value, line))