key_index.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # -*- coding:utf-8 -*-
  2. import config
  3. import tools
  4. import mmap
  5. def main():
  6. # 关键词索引容器,
  7. key_index = []
  8. with open(config.KEY_FILE, "r", encoding=config.ENCODING_CHARSET) as fkey, \
  9. mmap.mmap(fkey.fileno(), 0, access=mmap.ACCESS_READ) as fmmap:
  10. # 总大小
  11. total_num = fmmap.size()
  12. while True:
  13. # 读取光标位置
  14. cur_pos = fmmap.tell()
  15. # 把光标移动到下一行
  16. line = fmmap.readline()
  17. # 如果没有数据则结束
  18. if not line :
  19. break
  20. # 建立关键词序号和位置的关系,以索引当行号(0基)
  21. key_index.append(cur_pos)
  22. # 进度显示
  23. tools.tip_in_size(total_num, cur_pos)
  24. with open("./data/tmp/key_index_test.csv", "w", encoding=config.ENCODING_CHARSET) as f:
  25. f.write(",".join([str(i) for i in key_index]))
  26. # 保存索引
  27. # tools.save_obj(config.KEY_INDEX_CACHE, key_index)
  28. if __name__ == '__main__':
  29. TITLE = "关键词索引"
  30. # 日志配置初始化
  31. tools.init_log()
  32. tools.log_start_msg(TITLE)
  33. main()
  34. tools.log_end_msg(TITLE)