ChenYL 1 жил өмнө
parent
commit
2d1ccec4f4

+ 18 - 1
src/agg.py

@@ -9,6 +9,7 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
 import jieba
 
 import utils
+from src.constant import GLOBAL_PROJECT_RESOURCE_DIR
 
 # 文件后缀:_长尾词.txt
 WORD_FILE_SUFFIX = "_长尾词.txt"
@@ -31,9 +32,15 @@ WORD_AGG_RESULT_TEMP_FILE = "长尾词_聚合结果_临时.txt"
 # 文件:长尾词_聚合结果.txt
 WORD_AGG_RESULT_FILE = "长尾词_聚合结果.txt"
 
+# 文件:长尾词_基本信息.txt
+WORD_BASIC_INFO_FILE = "长尾词_基本信息.txt"
+
 # 文件夹:历史聚合数据归档文件夹
 WORD_AGG_DIR = "长尾词聚合分析_%s"
 
+# Java聚合jar包路径
+JAVA_AGG_JAR_PATH = os.path.join(GLOBAL_PROJECT_RESOURCE_DIR, "money-mining-1.0-jar-with-dependencies.jar")
+
 jieba.setLogLevel(jieba.logging.INFO)
 
 
@@ -48,6 +55,11 @@ def agg_word(path: str):
         print("输入的目标路径不存在! " + path)
         return
 
+    # 判断Jar包是否存在
+    if not os.path.exists(JAVA_AGG_JAR_PATH):
+        print("Java聚合jar包路径不存在!" + JAVA_AGG_JAR_PATH)
+        return
+
     # 目标路径分析
     zip_files, txt_files = [], []
     if os.path.isdir(path):
@@ -237,6 +249,11 @@ def word_split_and_reverse_index(data_path: str):
             for key, values in word_reverse_index_dict.items():
                 fo.write("%s,%s\n" % (key, ",".join(values)))
 
+        # 生成基本信息
+        with open(os.path.join(data_path, WORD_BASIC_INFO_FILE), "w", encoding="UTF-8") as fo:
+            fo.write("长尾词总数:%s\n" % total_line_num)
+            fo.write("倒排索引总数:%s\n" % len(word_reverse_index_dict))
+
         # 关闭进程池
         process_pool.shutdown()
 
@@ -309,7 +326,7 @@ def agg_process(data_path: str):
     :param data_path: 分析结果文件夹路径
     :return: True-运行正常 False-运行失败
     """
-    cmds = ["java", "-jar", "./resources/money-mining-1.0-jar-with-dependencies.jar", "agg", data_path]
+    cmds = ["java", "-jar", JAVA_AGG_JAR_PATH, "agg", data_path]
     return_code = subprocess.run(cmds).returncode
     return 0 == return_code
 

+ 0 - 13
src/config.ini

@@ -1,13 +0,0 @@
-; 基本配置
-[path]
-; 临时文件路径
-tmpPath = ../tmp
-; 配置文件名
-aggConfig = ../tmp/aggConfig.json
-
-[java]
-jar = ./resources/money-mining-1.0-jar-with-dependencies.jar
-
-
-
-

+ 9 - 0
src/constant.py

@@ -0,0 +1,9 @@
+# -*- coding:utf-8 -*-
+# 项目根路径
+GLOBAL_PROJECT_DIR = None
+# 项目源码路径
+GLOBAL_PROJECT_SRC_DIR = None
+# 项目资源文件
+GLOBAL_PROJECT_RESOURCE_DIR = None
+# 项目临时文件
+GLOBAL_PROJECT_TEMP_DIR = None

+ 46 - 0
src/main.py

@@ -0,0 +1,46 @@
+# -*- coding:utf-8 -*-
+import os
+import sys
+from pathlib import Path
+
+# 设置环境路径,移除最后的‘/src’ 部分
+project_dir = os.path.dirname(Path(__file__).resolve())[:-4]
+sys.path.append(project_dir)
+# 设置全局变量
+# 项目路径
+from src import constant
+# 项目根路径
+constant.GLOBAL_PROJECT_DIR = project_dir
+# 项目源码路径
+constant.GLOBAL_PROJECT_SRC_DIR = os.path.join(project_dir, "src")
+# 项目资源文件
+constant.GLOBAL_PROJECT_RESOURCE_DIR = os.path.join(project_dir, "src", "resources")
+# 项目临时文件
+constant.GLOBAL_PROJECT_TEMP_DIR = os.path.join(project_dir, "tmp")
+
+
+from agg import agg_word
+
+
+def main(args: list):
+    """
+    程序入口
+    :param args: 命令参数
+    :return:
+    """
+    if len(args) == 1:
+        print("请输入待运行的程序名")
+        return
+
+    func = args[1]
+    if "agg" == func:
+        if len(args) == 3:
+            agg_word(args[2])
+        else:
+            print("运行长尾词聚合程序,请先输入目标路径")
+    else:
+        print("输入了不知名程序名:%s" % func)
+
+
+if __name__ == "__main__":
+    main(sys.argv)

+ 27 - 5
src/MiningUI.py → src/mainw.py

@@ -1,10 +1,26 @@
 # -*- coding:utf-8 -*-
+import os.path
 import sys
-
-sys.path.append('E:\\ChenYL\\CodeRepository\\money-mining-python')
-
-from PySide6.QtWidgets import QApplication, QWidget, QVBoxLayout, QTabWidget, QLabel
-
+from pathlib import Path
+
+# 设置环境路径,移除最后的‘/src’ 部分
+project_dir = os.path.dirname(Path(__file__).resolve())[:-4]
+sys.path.append(project_dir)
+# 设置全局变量
+# 项目路径
+from src import constant
+# 项目根路径
+constant.GLOBAL_PROJECT_DIR = project_dir
+# 项目源码路径
+constant.GLOBAL_PROJECT_SRC_DIR = os.path.join(project_dir, "src")
+# 项目资源文件
+constant.GLOBAL_PROJECT_RESOURCE_DIR = os.path.join(project_dir, "src", "resources")
+# 项目临时文件
+constant.GLOBAL_PROJECT_TEMP_DIR = os.path.join(project_dir, "tmp")
+
+from PySide6.QtWidgets import QApplication, QWidget, QVBoxLayout, QTabWidget
+
+from src.ui.AggProcesModealAndView import AggProcessModelAndView
 from src.ui.AggAnalyseModeAndView import AggAnalyseModeAndView
 from src.ui.TemplateFilteringModelAndView import TemplateFilteringModelAndView
 
@@ -13,6 +29,11 @@ class MyWindow(QWidget):
     def __init__(self):
         super().__init__()
 
+        self.aggProcessTab = QWidget()
+        self.aggProcessTabLayout = QVBoxLayout()
+        self.aggProcessTabLayout.addWidget(AggProcessModelAndView())
+        self.aggProcessTab.setLayout(self.aggProcessTabLayout)
+
         self.aggAnalyseTab = QWidget()
         self.aggAnalyseTabLayout = QVBoxLayout()
         self.aggAnalyseTabLayout.addWidget(AggAnalyseModeAndView())
@@ -24,6 +45,7 @@ class MyWindow(QWidget):
         self.templateFilteringTab.setLayout(self.templateFilteringTabLayout)
 
         self.tab = QTabWidget()
+        self.tab.addTab(self.aggProcessTab, "聚合处理")
         self.tab.addTab(self.aggAnalyseTab, "聚合分析")
         self.tab.addTab(self.templateFilteringTab, "模板筛选")
 

+ 0 - 28
src/mining.py

@@ -1,28 +0,0 @@
-# -*- coding:utf-8 -*-
-import sys
-
-from agg import agg_word
-
-
-def main(args: list):
-    """
-    程序入口
-    :param args: 命令参数
-    :return:
-    """
-    if len(args) == 1:
-        print("请输入待运行的程序名")
-        return
-
-    func = args[1]
-    if "agg" == func:
-        if len(args) == 3:
-            agg_word(args[2])
-        else:
-            print("运行长尾词聚合程序,请先输入目标路径")
-    else:
-        print("输入了不知名程序名:%s" % func)
-
-
-if __name__ == "__main__":
-    main(sys.argv)

+ 8 - 13
src/ui/AggAnalyseModeAndView.py

@@ -1,14 +1,16 @@
 # -*- coding: utf-8 -*-
-import configparser
 import os.path
 
-from PySide6.QtCore import Qt, QEvent
-from PySide6.QtGui import QFont, QKeyEvent
+from PySide6.QtCore import Qt
+from PySide6.QtGui import QFont
 from PySide6.QtWidgets import QWidget, QFileDialog, QMessageBox
 
-from src import utils
+from src import utils, constant
 from src.ui.AggAnalyseView import Ui_Form
 
+# 配置文件路径
+CONFIG_PATH = os.path.join(constant.GLOBAL_PROJECT_TEMP_DIR, "aggAnalyseConf.json")
+
 
 class AggAnalyseModeAndView(QWidget, Ui_Form):
     def __init__(self, parent=None):
@@ -22,15 +24,8 @@ class AggAnalyseModeAndView(QWidget, Ui_Form):
         font.setPointSize(16)
         self.contentList.setFont(font)
 
-        # 获取当前脚本所在的目录
-        current_path = os.path.dirname(os.path.abspath(__file__))
-        # 获取当前脚本所在的项目根目录
-        root_path = os.path.dirname(current_path)
-        conf = configparser.ConfigParser()
-        conf.read(os.path.join(root_path, "config.ini"), encoding="UTF-8")
         # 获取历史使用记录
-        self.configPath = conf['path']['aggConfig']
-        self.config = utils.load_json(self.configPath)
+        self.config = utils.load_json(CONFIG_PATH)
         if self.config:
             self.targetFilePath.setText(self.config['targetFilePath'])
             self.load()
@@ -106,7 +101,7 @@ class AggAnalyseModeAndView(QWidget, Ui_Form):
         self.msg.setText("总数量:%d,当前位置:%d,数量:%d" % (
             self.totalNum, currentIndex, self.contentDict[currentIndex]["count"]))
         # 保存历史使用记录
-        utils.saveJson(self.configPath, self.config)
+        utils.saveJson(CONFIG_PATH, self.config)
 
     def keyPressEvent(self, event):
         if event.key() == Qt.Key.Key_Left:

+ 33 - 0
src/ui/AggProcesModealAndView.py

@@ -0,0 +1,33 @@
+import os
+
+from PySide6.QtWidgets import QWidget, QFileDialog, QMessageBox
+
+from src import constant
+from src.ui.AggProcess import Ui_Form
+
+
+class AggProcessModelAndView(QWidget, Ui_Form):
+    def __init__(self, parent=None):
+        super(AggProcessModelAndView, self).__init__(parent)
+        self.setupUi(self)
+
+        self.selectFileBtn.clicked.connect(self.selectFile)
+        self.selectDirBtn.clicked.connect(self.selectDir)
+        self.startBtn.clicked.connect(self.process)
+
+    def selectFile(self):
+        file_path, file_type = QFileDialog.getOpenFileName(self, "选择文件")
+        self.dataPath.setText(file_path)
+
+    def selectDir(self):
+        file_path = QFileDialog.getExistingDirectory(self, "选择文件夹")
+        self.dataPath.setText(file_path)
+
+    def process(self):
+        dataPath = self.dataPath.text()
+        if not dataPath:
+            QMessageBox.warning(self, "提示", "请先选择待聚合的文件/文件夹")
+            return
+        # 调用聚合程序
+        process_file = os.path.join(constant.GLOBAL_PROJECT_SRC_DIR, "main.py")
+        os.system('start cmd.exe /K "chcp 65001 && conda activate money-mining && python %s agg %s"' % (process_file, dataPath))

+ 68 - 0
src/ui/AggProcess.py

@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+
+################################################################################
+## Form generated from reading UI file 'AggProcess.ui'
+##
+## Created by: Qt User Interface Compiler version 6.5.1
+##
+## WARNING! All changes made in this file will be lost when recompiling UI file!
+################################################################################
+
+from PySide6.QtCore import (QCoreApplication, QDate, QDateTime, QLocale,
+    QMetaObject, QObject, QPoint, QRect,
+    QSize, QTime, QUrl, Qt)
+from PySide6.QtGui import (QBrush, QColor, QConicalGradient, QCursor,
+    QFont, QFontDatabase, QGradient, QIcon,
+    QImage, QKeySequence, QLinearGradient, QPainter,
+    QPalette, QPixmap, QRadialGradient, QTransform)
+from PySide6.QtWidgets import (QApplication, QGridLayout, QLayout, QLineEdit,
+    QPushButton, QSizePolicy, QVBoxLayout, QWidget)
+
+class Ui_Form(object):
+    def setupUi(self, Form):
+        if not Form.objectName():
+            Form.setObjectName(u"Form")
+        Form.resize(1507, 873)
+        self.verticalLayout = QVBoxLayout(Form)
+        self.verticalLayout.setObjectName(u"verticalLayout")
+        self.gridLayout = QGridLayout()
+        self.gridLayout.setObjectName(u"gridLayout")
+        self.gridLayout.setSizeConstraint(QLayout.SetMaximumSize)
+        self.selectDirBtn = QPushButton(Form)
+        self.selectDirBtn.setObjectName(u"selectDirBtn")
+
+        self.gridLayout.addWidget(self.selectDirBtn, 0, 1, 1, 1)
+
+        self.startBtn = QPushButton(Form)
+        self.startBtn.setObjectName(u"startBtn")
+
+        self.gridLayout.addWidget(self.startBtn, 1, 0, 1, 3)
+
+        self.dataPath = QLineEdit(Form)
+        self.dataPath.setObjectName(u"dataPath")
+        self.dataPath.setEnabled(False)
+
+        self.gridLayout.addWidget(self.dataPath, 0, 0, 1, 1)
+
+        self.selectFileBtn = QPushButton(Form)
+        self.selectFileBtn.setObjectName(u"selectFileBtn")
+
+        self.gridLayout.addWidget(self.selectFileBtn, 0, 2, 1, 1)
+
+
+        self.verticalLayout.addLayout(self.gridLayout)
+
+
+        self.retranslateUi(Form)
+
+        QMetaObject.connectSlotsByName(Form)
+    # setupUi
+
+    def retranslateUi(self, Form):
+        Form.setWindowTitle(QCoreApplication.translate("Form", u"Form", None))
+        self.selectDirBtn.setText(QCoreApplication.translate("Form", u"\u9009\u62e9\u6587\u4ef6\u5939", None))
+        self.startBtn.setText(QCoreApplication.translate("Form", u"\u542f\u52a8\u5206\u6790\u7a0b\u5e8f", None))
+        self.dataPath.setPlaceholderText(QCoreApplication.translate("Form", u"\u5f85\u5206\u6790\u6570\u636e\u76ee\u5f55\u8def\u5f84", None))
+        self.selectFileBtn.setText(QCoreApplication.translate("Form", u"\u9009\u62e9\u6587\u4ef6", None))
+    # retranslateUi
+

+ 59 - 0
src/ui/AggProcess.ui

@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Form</class>
+ <widget class="QWidget" name="Form">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>1507</width>
+    <height>873</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Form</string>
+  </property>
+  <layout class="QVBoxLayout" name="verticalLayout">
+   <item>
+    <layout class="QGridLayout" name="gridLayout">
+     <property name="sizeConstraint">
+      <enum>QLayout::SetMaximumSize</enum>
+     </property>
+     <item row="0" column="1">
+      <widget class="QPushButton" name="selectDirBtn">
+       <property name="text">
+        <string>选择文件夹</string>
+       </property>
+      </widget>
+     </item>
+     <item row="1" column="0" colspan="3">
+      <widget class="QPushButton" name="startBtn">
+       <property name="text">
+        <string>启动分析程序</string>
+       </property>
+      </widget>
+     </item>
+     <item row="0" column="0">
+      <widget class="QLineEdit" name="dataPath">
+       <property name="enabled">
+        <bool>false</bool>
+       </property>
+       <property name="placeholderText">
+        <string>待分析数据目录路径</string>
+       </property>
+      </widget>
+     </item>
+     <item row="0" column="2">
+      <widget class="QPushButton" name="selectFileBtn">
+       <property name="text">
+        <string>选择文件</string>
+       </property>
+      </widget>
+     </item>
+    </layout>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections/>
+</ui>

+ 13 - 8
src/ui/TemplateFilteringModelAndView.py

@@ -7,6 +7,7 @@ from functools import partial
 from PySide6.QtWidgets import QMessageBox, QFileDialog, QWidget, QLineEdit, QPushButton, \
     QTextEdit, QTextBrowser
 
+from src import constant
 from src.ui.TemplateFilteringView import Ui_Form
 
 category_pattern = re.compile(r'\[类别\]')
@@ -17,7 +18,9 @@ CHARACTER_FILTER_STR = "[字母]"
 DIGIT_FILTER_STR = "[数字]"
 CATEGORY_FILTER_STR = "[类别]"
 
-CONFIG_FILE_PATH = "../tmp/config.json"
+# 配置文件路径
+CONFIG_PATH = os.path.join(constant.GLOBAL_PROJECT_TEMP_DIR, "templateFilteringConf.json")
+# 配置文件属性
 CONFIG_ITEM_LAST_SELECT_FILE_PATH = "lastSelectFilePath"
 
 
@@ -46,13 +49,15 @@ class TemplateFilteringModelAndView(QWidget, Ui_Form):
         }
         self.resultDict = {
             self.firstFilterBtn.objectName(): (
-            self.firstKeyBox, self.firstCategoryBox, self.firstResultBox, None, self.result_label_1),
+                self.firstKeyBox, self.firstCategoryBox, self.firstResultBox, None, self.result_label_1),
             self.secondFilterBtn.objectName(): (
-            self.secondKeyBox, self.secondCategoryBox, self.secondResultBox, self.firstResultBox, self.result_label_2),
+                self.secondKeyBox, self.secondCategoryBox, self.secondResultBox, self.firstResultBox,
+                self.result_label_2),
             self.threeFilterBtn.objectName(): (
-            self.threeKeyBox, self.threeCategoryBox, self.threeResultBox, self.secondResultBox, self.result_label_3),
+                self.threeKeyBox, self.threeCategoryBox, self.threeResultBox, self.secondResultBox,
+                self.result_label_3),
             self.fourFilterBtn.objectName(): (
-            self.fourKeyBox, self.fourCategoryBox, self.fourResultBox, self.threeResultBox, self.result_label_4)
+                self.fourKeyBox, self.fourCategoryBox, self.fourResultBox, self.threeResultBox, self.result_label_4)
         }
 
         self.fileBtn.clicked.connect(self.selectFile)
@@ -82,14 +87,14 @@ class TemplateFilteringModelAndView(QWidget, Ui_Form):
         self.fourFilterBtn.clicked.connect(partial(self.submit, self.fourFilterBtn))
 
     def loadConfig(self):
-        if os.path.isfile(CONFIG_FILE_PATH):
-            with open(CONFIG_FILE_PATH, 'r', encoding='utf-8') as f:
+        if os.path.isfile(CONFIG_PATH):
+            with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
                 config = json.loads(f.read())
                 self.filePathBox.setText(config[CONFIG_ITEM_LAST_SELECT_FILE_PATH])
 
     def selectFile(self):
         file_path, file_type = QFileDialog.getOpenFileName(self, "选择文件")
-        with open(CONFIG_FILE_PATH, 'w', encoding='utf-8') as f:
+        with open(CONFIG_PATH, 'w', encoding='utf-8') as f:
             f.write(json.dumps({CONFIG_ITEM_LAST_SELECT_FILE_PATH: file_path}))
         self.filePathBox.setText(file_path)
 

+ 10 - 7
src/utils.py

@@ -4,11 +4,10 @@ import math
 import os
 import pickle
 
-# 停用词存放文件夹
-STOP_WORD_DIR = "./resources/stopwords"
+from src import constant
 
-# 临时文件路径
-TEMP_PATH = "../tmp"
+# 停用词存放文件夹
+STOP_WORD_DIR = os.path.join(constant.GLOBAL_PROJECT_RESOURCE_DIR, "stopwords")
 
 # 停用词模型
 STOP_WORD_CACHE = "stop_word.pkl"
@@ -38,11 +37,11 @@ def load_stop_word():
     加载停用词
     """
     # 判断临时文件路径是否存在,不存在则重新创建
-    if not os.path.exists(TEMP_PATH):
-        os.makedirs(TEMP_PATH)
+    if not os.path.exists(constant.GLOBAL_PROJECT_TEMP_DIR):
+        os.makedirs(constant.GLOBAL_PROJECT_TEMP_DIR)
 
     # 判断是否存在缓存
-    stop_word_cache_path = os.path.join(TEMP_PATH, STOP_WORD_CACHE)
+    stop_word_cache_path = os.path.join(constant.GLOBAL_PROJECT_TEMP_DIR, STOP_WORD_CACHE)
     if os.path.exists(stop_word_cache_path) and os.path.isfile(stop_word_cache_path):
         return load_obj(stop_word_cache_path)
 
@@ -115,6 +114,10 @@ def saveJson(save_path: str, save_obj: dict):
     :param save_obj: 保存的内容对象
     :return:
     """
+    # 判断临时文件路径是否存在,不存在则重新创建
+    if not os.path.exists(constant.GLOBAL_PROJECT_TEMP_DIR):
+        os.makedirs(constant.GLOBAL_PROJECT_TEMP_DIR)
+
     with open(save_path, 'w', encoding='utf-8') as f:
         f.write(json.dumps(save_obj))
 

+ 2 - 0
start.bat

@@ -0,0 +1,2 @@
+@echo off
+conda activate money-mining && start pythonw ./src/mainw.py