Browse Source

fix:修复返回值错误

ChenYL 1 year ago
parent
commit
7aa3d09409
1 changed files with 13 additions and 1 deletions
  1. 13 1
      src/agg.py

+ 13 - 1
src/agg.py

@@ -148,6 +148,17 @@ def merge_word(data_path: str, txt_files: list):
     # 长尾词集合容器
     word_set = set()
 
+    # 读取数据目录下是否还有其他长尾词文件
+    files = os.listdir(data_path)
+    if files:
+        for file in files:
+            if file.endswith(WORD_FILE_SUFFIX):
+                txt_files.append(os.path.join(data_path, file))
+
+    # 如果没有待处理的文件列表,返回
+    if not txt_files:
+        return False
+
     # 读取数据并排重
     for i, file in enumerate(txt_files):
         with open(file, "r", encoding="utf-8") as f:
@@ -182,7 +193,7 @@ def word_split_and_reverse_index(data_path: str):
 
     if total_line_num == 0:
         print("没有待处理的数据,文本量为0")
-        return True
+        return False
 
     # 分割任务数量
     task_list = utils.avg_split_task(total_line_num, math.ceil(total_line_num / os.cpu_count()))
@@ -324,6 +335,7 @@ def sort_file_content(data_path: str):
                 if not tmp_result:
                     continue
                 else:
+                    tmp_result = sorted(tmp_result, key=lambda x: len(x), reverse=True)
                     result.append((count, tmp_result))
                     tmp_result = []
                     count = 0