key_index.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # -*- coding:utf-8 -*-
  2. import config
  3. import tools
  4. import mmap
  5. import logging
  6. TITLE = "关键词索引"
  7. def main():
  8. # 日志配置初始化
  9. tools.init_log()
  10. tools.log_start_msg(TITLE)
  11. # 关键词索引容器
  12. key_index_cache = {}
  13. with open(config.KEY_FILE, "r", encoding=config.ENCODING_CHARSET) as fkey, \
  14. mmap.mmap(fkey.fileno(), 0, access=mmap.ACCESS_READ) as fmmap:
  15. # 总大小
  16. total_num = fmmap.size()
  17. while True:
  18. # 读取光标位置
  19. cur_pos = fmmap.tell()
  20. # 把光标移动到下一行
  21. line = fmmap.readline().decode(config.ENCODING_CHARSET)
  22. # 如果没有数据则结束
  23. if not line :
  24. break
  25. # 获取关键词序号
  26. index = line.index(",")
  27. # 建立关键词序号和位置的关系
  28. key_index_cache[line[:index]]=cur_pos
  29. # 进度显示
  30. tools.tip_in_size(total_num, cur_pos)
  31. # 保存索引
  32. tools.save_obj(config.KEY_INDEX_CACHE, key_index_cache)
  33. tools.log_end_msg(TITLE)
  34. if __name__ == '__main__':
  35. main()
  36. # key_index_cache = tools.load_obj(config.KEY_INDEX_CACHE)
  37. # with open(config.KEY_FILE, "r", encoding=config.ENCODING_CHARSET) as fkey, \
  38. # mmap.mmap(fkey.fileno(), 0, access=mmap.ACCESS_READ) as fmmap:
  39. # for key,value in key_index_cache.items():
  40. # fmmap.seek(value)
  41. # line = fmmap.readline().decode(config.ENCODING_CHARSET)
  42. # logging.debug("key: %s, value: %d, 内容:%s" % (key, value, line))