# -*- coding:utf-8 -*- from collections import namedtuple from dataclasses import make_dataclass import os import tools import config import logging import random import time import ast import mmap TASK_TITLE = "数据统计分析" def transfer_str(num): msg = None if num >= 10000: msg = "%d万%d" % (num//10000, num%10000) else: msg = str(num) return msg def cal(list): list_len = len(list) list_count = sum(list) sum_msg = transfer_str(list_len) count_msg = transfer_str(list_count) avg_msg = transfer_str(int(list_count/list_len)) return sum_msg, count_msg, avg_msg def tip(condition, list): logging.info("条件:%s - 涉及:%s个词根,涉及词数:%s,平均约:%s 词数/词根" % ((condition,)+ cal(list))) def test_tip(list, ele_num): start =time.time() tmp = ast.literal_eval(str(random.sample(list, ele_num))) end =time.time() logging.info("%s个元素的字符列表转换成对象耗时%s" % (transfer_str(ele_num), end-start)) def cost_statistics(): with open(config.KEY_REVERSE_STATISTICS_FILE, "r", encoding=config.ENCODING_CHARSET) as f: count_list= [] total_count=0 for line in f: first_index = line.index(",") count = int(line[first_index+1:]) count_list.append(count) total_count = total_count + count logging.info("总祠根数:%d, 涉及的总分词查找数:%d" % (len(count_list), total_count)) tip("等于1", [val for val in count_list if val == 1]) tip("大于1小于100", [val for val in count_list if val > 1 and val < 100]) tip("大于等于100小于200", [val for val in count_list if val >= 100 and val < 200]) tip("大于等于200小于300", [val for val in count_list if val >= 200 and val < 300]) tip("大于等于300小于400", [val for val in count_list if val >= 300 and val < 400]) tip("大于等于400小于500", [val for val in count_list if val >= 400 and val < 500]) tip("大于等于500小于1000", [val for val in count_list if val >= 500 and val < 1000]) tip("大于等于1000小于5000", [val for val in count_list if val >= 1000 and val < 5000]) tip("大于等于5000小于1万", [val for val in count_list if val >= 5000 and val < 10000]) tip("大于等于1万小于5万", [val for val in count_list if val >= 10000 and val < 50000]) tip("大于等于5万小于10万", [val for val in count_list if val >= 50000 and val < 100000]) tip("大于等于10万", [val for val in count_list if val >= 100000]) sample_list = [i for i in range(14500029)] test_tip(sample_list, 1) test_tip(sample_list, 10) test_tip(sample_list, 50) test_tip(sample_list, 100) test_tip(sample_list, 200) test_tip(sample_list, 300) test_tip(sample_list, 400) test_tip(sample_list, 500) test_tip(sample_list, 1000) test_tip(sample_list, 5000) test_tip(sample_list, 10000) test_tip(sample_list, 50000) test_tip(sample_list, 100000) test_tip(sample_list, 595528) test_tip(sample_list, 689520) test_tip(sample_list, 776035) test_tip(sample_list, 822266) test_tip(sample_list, 951491) def memory_statistics(): key_reverse_index_cache = tools.load_obj(config.KEY_REVERSE_INDEX_CACHE) end_pos = key_reverse_index_cache["导不出"] logging.info("查找结束位置") with open(config.KEY_REVERSE_FILE, "r", encoding=config.ENCODING_CHARSET) as freverse, \ mmap.mmap(freverse.fileno(), 0, access=mmap.ACCESS_READ) as fmmap: logging.info("开始构建缓存") cache = {} start = time.time() while True: cur_pos = fmmap.tell() if cur_pos > end_pos: break line = fmmap.readline().decode("UTF-8") first_index = line.index(",") key = line[:first_index] # 转换 word_root = line[first_index+1:] cache[key]=ast.literal_eval(word_root) end = time.time() logging.info('构建热点缓存完成,耗时:%s,缓存数量:%d' % ((end-start), len(cache))) logging.info('把缓存保存到本地') tools.save_obj(config.KEY_REVERSE_INDEX_HOT_CACHE, cache) logging.info('保存结束') time.sleep(20) logging.info('留20s进行内存观察') def main(): # num = 459789 # print(num%10000) # print(num//10000) # return tools.init_log() tools.log_start_msg(TASK_TITLE) memory_statistics() tools.log_end_msg(TASK_TITLE) if __name__ == "__main__": # print("加载开始") # cache = tools.load_obj(config.KEY_REVERSE_INDEX_HOT_CACHE+".bak") # print("加载结束") # time.sleep(20) Shape = namedtuple('Shape', ['x', 'y', 'z']) exm = Shape(1, 2, 3) print(exm.index(2))