| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183 |
- # -*- coding: utf-8 -*-
- import json
- import os.path
- import re
- import sys
- from functools import partial
- from PySide6.QtWidgets import QMainWindow, QApplication, QMessageBox, QFileDialog, QWidget, QLineEdit, QPushButton, \
- QTextEdit, QTextBrowser
- from src.DataFilter import Ui_Form
- category_pattern = re.compile(r'\[类别\]')
- digit_pattern = re.compile(r'\[数字\]')
- english_pattern = re.compile(r'\[字母\]')
- CHARACTER_FILTER_STR = "[字母]"
- DIGIT_FILTER_STR = "[数字]"
- CATEGORY_FILTER_STR = "[类别]"
- CONFIG_FILE_PATH = "./config.json"
- CONFIG_ITEM_LAST_SELECT_FILE_PATH = "lastSelectFilePath"
- class MyMainForm(QWidget, Ui_Form):
- def __init__(self, parent=None):
- super(MyMainForm, self).__init__(parent)
- self.setupUi(self)
- self.bind()
- self.loadConfig()
- def bind(self):
- self.toolDict = {
- self.firstDigitBtn.objectName(): self.firstKeyBox,
- self.firstCategoryBtn.objectName(): self.firstKeyBox,
- self.firstCharacterBtn.objectName(): self.firstKeyBox,
- self.secondDigitBtn.objectName(): self.secondKeyBox,
- self.secondCategoryBtn.objectName(): self.secondKeyBox,
- self.secondCharacterBtn.objectName(): self.secondKeyBox,
- self.threeDigitBtn.objectName(): self.threeKeyBox,
- self.threeCategoryBtn.objectName(): self.threeKeyBox,
- self.threeCharacterBtn.objectName(): self.threeKeyBox,
- self.fourDigitBtn.objectName(): self.fourKeyBox,
- self.fourCategoryBtn.objectName(): self.fourKeyBox,
- self.fourCharacterBtn.objectName(): self.fourKeyBox
- }
- self.resultDict = {
- self.firstFilterBtn.objectName(): (self.firstKeyBox, self.firstCategoryBox, self.firstResultBox, None, self.result_label_1),
- self.secondFilterBtn.objectName(): (self.secondKeyBox, self.secondCategoryBox, self.secondResultBox, self.firstResultBox, self.result_label_2),
- self.threeFilterBtn.objectName(): (self.threeKeyBox, self.threeCategoryBox, self.threeResultBox, self.secondResultBox, self.result_label_3),
- self.fourFilterBtn.objectName(): (self.fourKeyBox, self.fourCategoryBox, self.fourResultBox, self.threeResultBox, self.result_label_4)
- }
- self.fileBtn.clicked.connect(self.selectFile)
- self.firstCategoryBtn.clicked.connect(partial(self.add_filter_str, self.firstCategoryBtn, CATEGORY_FILTER_STR))
- self.firstDigitBtn.clicked.connect(partial(self.add_filter_str, self.firstDigitBtn, DIGIT_FILTER_STR))
- self.firstCharacterBtn.clicked.connect(partial(self.add_filter_str, self.firstCharacterBtn, CHARACTER_FILTER_STR))
- self.firstFilterBtn.clicked.connect(partial(self.submit, self.firstFilterBtn))
- self.secondCategoryBtn.clicked.connect(partial(self.add_filter_str, self.secondCategoryBtn, CATEGORY_FILTER_STR))
- self.secondDigitBtn.clicked.connect(partial(self.add_filter_str, self.secondDigitBtn, DIGIT_FILTER_STR))
- self.secondCharacterBtn.clicked.connect(partial(self.add_filter_str, self.secondCharacterBtn, CHARACTER_FILTER_STR))
- self.secondFilterBtn.clicked.connect(partial(self.submit, self.secondFilterBtn))
- self.threeCategoryBtn.clicked.connect(partial(self.add_filter_str, self.threeCategoryBtn, CATEGORY_FILTER_STR))
- self.threeDigitBtn.clicked.connect(partial(self.add_filter_str, self.threeDigitBtn, DIGIT_FILTER_STR))
- self.threeCharacterBtn.clicked.connect(partial(self.add_filter_str, self.threeCharacterBtn, CHARACTER_FILTER_STR))
- self.threeFilterBtn.clicked.connect(partial(self.submit, self.threeFilterBtn))
- self.fourCategoryBtn.clicked.connect(partial(self.add_filter_str, self.fourCategoryBtn, CATEGORY_FILTER_STR))
- self.fourDigitBtn.clicked.connect(partial(self.add_filter_str, self.fourDigitBtn, DIGIT_FILTER_STR))
- self.fourCharacterBtn.clicked.connect(partial(self.add_filter_str, self.fourCharacterBtn, CHARACTER_FILTER_STR))
- self.fourFilterBtn.clicked.connect(partial(self.submit, self.fourFilterBtn))
- def loadConfig(self):
- if os.path.isfile(CONFIG_FILE_PATH):
- with open(CONFIG_FILE_PATH, 'r', encoding='utf-8') as f:
- config = json.loads(f.read())
- self.filePathBox.setText(config[CONFIG_ITEM_LAST_SELECT_FILE_PATH])
- def selectFile(self):
- file_path, file_type = QFileDialog.getOpenFileName(self, "选择文件")
- with open(CONFIG_FILE_PATH, 'w', encoding='utf-8') as f:
- f.write(json.dumps({CONFIG_ITEM_LAST_SELECT_FILE_PATH: file_path}))
- self.filePathBox.setText(file_path)
- def add_filter_str(self, btn_widget: QPushButton, filter_text):
- key_box = self.toolDict[btn_widget.objectName()]
- key_box.setText(key_box.text() + filter_text)
- def submit(self, filter_btn: QPushButton):
- key_box, category_box, result_box, parent_result_box, result_label = self.resultDict[filter_btn.objectName()]
- if not self.check(key_box, category_box, parent_result_box):
- return
- before_filter_cnt, after_filter_cnt, filter_result_arr = self.deal(key_box, category_box, parent_result_box)
- result_label.setText("提取结果:原始数据%s条,筛选后%s条" % (before_filter_cnt, after_filter_cnt))
- result_box.setText("\n".join(filter_result_arr))
- def check(self, key_box: QLineEdit, category_box: QTextEdit, parent_result_box: QTextBrowser):
- key_text = key_box.text()
- if len(key_text) == 0:
- QMessageBox.warning(self, "输入提示", "请输入待筛选关键词")
- return False
- cnt = 0
- for pattern in [category_pattern, digit_pattern, english_pattern]:
- if pattern.search(key_text) is not None:
- cnt = cnt + 1
- if cnt > 1:
- QMessageBox.warning(self, "提示", "一次只能使用一种正则筛选项")
- return False
- category_text = category_box.toPlainText()
- if category_pattern.search(key_text) is not None and len(category_text) == 0:
- QMessageBox.warning(self, "提示", "使用类别筛选,请输入待筛选的类别关键词")
- return False
- if parent_result_box is None:
- file_path = self.filePathBox.text()
- if len(file_path) == 0:
- QMessageBox.warning(self, "提示", "请选择带筛选文件")
- return False
- elif len(parent_result_box.toPlainText()) == 0:
- QMessageBox.warning(self, "提示", "上级结果中没有数据")
- return False
- return True
- def deal(self, key_box: QLineEdit, category_box: QTextEdit, parent_result_box: QTextBrowser):
- key_text = key_box.text()
- parent_key_arr = None
- if parent_result_box is None:
- with open(self.filePathBox.text(), 'r', encoding='utf-8') as f:
- parent_key_arr = [content.replace("\n", "") for content in f.readlines()]
- else:
- parent_key_arr = parent_result_box.toPlainText().split("\n")
- filter_result_arr = None
- if category_pattern.search(key_text) is not None:
- filter_result_arr = set()
- categoryKeyArray = category_box.toPlainText().splitlines()
- for categoryKey in categoryKeyArray:
- filter_result_arr.update(self.filter(parent_key_arr, key_text, "类别", categoryKey))
- elif digit_pattern.search(key_text) is not None:
- filter_result_arr = self.filter(parent_key_arr, key_text, "数字", "0-9")
- elif english_pattern.search(key_text) is not None:
- filter_result_arr = self.filter(parent_key_arr, key_text, "字母", "A-Za-z")
- else:
- filter_result_arr = self.filter(parent_key_arr, key_text)
- return len(parent_key_arr), len(filter_result_arr), filter_result_arr
- def filter(self, originArray, inputText, oldStr=None, newStr=None):
- resultArray = []
- key_pattern = None
- filter_pattern = None
- if oldStr is not None and len(oldStr) > 0:
- key_pattern = re.compile(inputText.replace("[{}]".format(oldStr), ""))
- else:
- key_pattern = re.compile(inputText)
- if newStr is not None and len(newStr) > 0:
- filter_pattern = re.compile("[{}]".format(newStr))
- for originKey in originArray:
- if key_pattern.search(originKey) is not None:
- if filter_pattern is not None:
- if filter_pattern.search(originKey) is not None:
- resultArray.append(originKey)
- else:
- resultArray.append(originKey)
- return resultArray
- if __name__ == "__main__":
- app = QApplication(sys.argv)
- myWin = MyMainForm()
- myWin.show()
- sys.exit(app.exec())
|