| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- # -*- coding:utf-8 -*-
- import sys
- from time import time
- import os
- import config
- import tools
- import ast
- import re
- import stop_word
- import mmap
- TITLE = "关键词倒排索引"
- def main():
- # 日志配置初始化
- tools.init_log()
- tools.log_start_msg(TITLE)
- # 关键词倒排索引容器
- key_reverse_index_cache = {}
- with open(config.KEY_REVERSE_FILE, "r", encoding=config.ENCODING_CHARSET) as freverse, \
- mmap.mmap(freverse.fileno(), 0, access=mmap.ACCESS_READ) as fmmap:
-
- # 总大小
- total_num = fmmap.size()
- while True:
- # 读取光标位置
- cur_pos = fmmap.tell()
- # 把光标移动到下一行
- line = fmmap.readline().decode(config.ENCODING_CHARSET)
- # 如果没有数据则结束
- if not line :
- break
-
- # 获取词根位置,建立词根和位置的关系
- index = line.index(",")
- key_reverse_index_cache[line[:index]]=cur_pos
-
- # 进度显示
- tools.tip_in_size(total_num, cur_pos)
-
- # 保存索引
- tools.save_obj(config.KEY_REVERSE_INDEX_CACHE, key_reverse_index_cache)
- tools.log_end_msg(TITLE)
- if __name__ == "__main__":
- # main()
- key_reverse_index_cache = tools.load_obj(config.KEY_REVERSE_INDEX_CACHE)
- for i, item in enumerate(key_reverse_index_cache):
- if i > 10:
- break
- print(item)
|