main.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. # -*- coding: utf-8 -*-
  2. import json
  3. import os.path
  4. import re
  5. import sys
  6. from functools import partial
  7. from PySide6.QtWidgets import QMainWindow, QApplication, QMessageBox, QFileDialog, QWidget, QLineEdit, QPushButton, \
  8. QTextEdit, QTextBrowser
  9. from src.DataFilter import Ui_Form
  10. category_pattern = re.compile(r'\[类别\]')
  11. digit_pattern = re.compile(r'\[数字\]')
  12. english_pattern = re.compile(r'\[字母\]')
  13. CHARACTER_FILTER_STR = "[字母]"
  14. DIGIT_FILTER_STR = "[数字]"
  15. CATEGORY_FILTER_STR = "[类别]"
  16. CONFIG_FILE_PATH = "./config.json"
  17. CONFIG_ITEM_LAST_SELECT_FILE_PATH = "lastSelectFilePath"
  18. class MyMainForm(QWidget, Ui_Form):
  19. def __init__(self, parent=None):
  20. super(MyMainForm, self).__init__(parent)
  21. self.setupUi(self)
  22. self.bind()
  23. self.loadConfig()
  24. def bind(self):
  25. self.toolDict = {
  26. self.firstDigitBtn.objectName(): self.firstKeyBox,
  27. self.firstCategoryBtn.objectName(): self.firstKeyBox,
  28. self.firstCharacterBtn.objectName(): self.firstKeyBox,
  29. self.secondDigitBtn.objectName(): self.secondKeyBox,
  30. self.secondCategoryBtn.objectName(): self.secondKeyBox,
  31. self.secondCharacterBtn.objectName(): self.secondKeyBox,
  32. self.threeDigitBtn.objectName(): self.threeKeyBox,
  33. self.threeCategoryBtn.objectName(): self.threeKeyBox,
  34. self.threeCharacterBtn.objectName(): self.threeKeyBox,
  35. self.fourDigitBtn.objectName(): self.fourKeyBox,
  36. self.fourCategoryBtn.objectName(): self.fourKeyBox,
  37. self.fourCharacterBtn.objectName(): self.fourKeyBox
  38. }
  39. self.resultDict = {
  40. self.firstFilterBtn.objectName(): (self.firstKeyBox, self.firstCategoryBox, self.firstResultBox, None, self.result_label_1),
  41. self.secondFilterBtn.objectName(): (self.secondKeyBox, self.secondCategoryBox, self.secondResultBox, self.firstResultBox, self.result_label_2),
  42. self.threeFilterBtn.objectName(): (self.threeKeyBox, self.threeCategoryBox, self.threeResultBox, self.secondResultBox, self.result_label_3),
  43. self.fourFilterBtn.objectName(): (self.fourKeyBox, self.fourCategoryBox, self.fourResultBox, self.threeResultBox, self.result_label_4)
  44. }
  45. self.fileBtn.clicked.connect(self.selectFile)
  46. self.firstCategoryBtn.clicked.connect(partial(self.add_filter_str, self.firstCategoryBtn, CATEGORY_FILTER_STR))
  47. self.firstDigitBtn.clicked.connect(partial(self.add_filter_str, self.firstDigitBtn, DIGIT_FILTER_STR))
  48. self.firstCharacterBtn.clicked.connect(partial(self.add_filter_str, self.firstCharacterBtn, CHARACTER_FILTER_STR))
  49. self.firstFilterBtn.clicked.connect(partial(self.submit, self.firstFilterBtn))
  50. self.secondCategoryBtn.clicked.connect(partial(self.add_filter_str, self.secondCategoryBtn, CATEGORY_FILTER_STR))
  51. self.secondDigitBtn.clicked.connect(partial(self.add_filter_str, self.secondDigitBtn, DIGIT_FILTER_STR))
  52. self.secondCharacterBtn.clicked.connect(partial(self.add_filter_str, self.secondCharacterBtn, CHARACTER_FILTER_STR))
  53. self.secondFilterBtn.clicked.connect(partial(self.submit, self.secondFilterBtn))
  54. self.threeCategoryBtn.clicked.connect(partial(self.add_filter_str, self.threeCategoryBtn, CATEGORY_FILTER_STR))
  55. self.threeDigitBtn.clicked.connect(partial(self.add_filter_str, self.threeDigitBtn, DIGIT_FILTER_STR))
  56. self.threeCharacterBtn.clicked.connect(partial(self.add_filter_str, self.threeCharacterBtn, CHARACTER_FILTER_STR))
  57. self.threeFilterBtn.clicked.connect(partial(self.submit, self.threeFilterBtn))
  58. self.fourCategoryBtn.clicked.connect(partial(self.add_filter_str, self.fourCategoryBtn, CATEGORY_FILTER_STR))
  59. self.fourDigitBtn.clicked.connect(partial(self.add_filter_str, self.fourDigitBtn, DIGIT_FILTER_STR))
  60. self.fourCharacterBtn.clicked.connect(partial(self.add_filter_str, self.fourCharacterBtn, CHARACTER_FILTER_STR))
  61. self.fourFilterBtn.clicked.connect(partial(self.submit, self.fourFilterBtn))
  62. def loadConfig(self):
  63. if os.path.isfile(CONFIG_FILE_PATH):
  64. with open(CONFIG_FILE_PATH, 'r', encoding='utf-8') as f:
  65. config = json.loads(f.read())
  66. self.filePathBox.setText(config[CONFIG_ITEM_LAST_SELECT_FILE_PATH])
  67. def selectFile(self):
  68. file_path, file_type = QFileDialog.getOpenFileName(self, "选择文件")
  69. with open(CONFIG_FILE_PATH, 'w', encoding='utf-8') as f:
  70. f.write(json.dumps({CONFIG_ITEM_LAST_SELECT_FILE_PATH: file_path}))
  71. self.filePathBox.setText(file_path)
  72. def add_filter_str(self, btn_widget: QPushButton, filter_text):
  73. key_box = self.toolDict[btn_widget.objectName()]
  74. key_box.setText(key_box.text() + filter_text)
  75. def submit(self, filter_btn: QPushButton):
  76. key_box, category_box, result_box, parent_result_box, result_label = self.resultDict[filter_btn.objectName()]
  77. if not self.check(key_box, category_box, parent_result_box):
  78. return
  79. before_filter_cnt, after_filter_cnt, filter_result_arr = self.deal(key_box, category_box, parent_result_box)
  80. result_label.setText("提取结果:原始数据%s条,筛选后%s条" % (before_filter_cnt, after_filter_cnt))
  81. result_box.setText("\n".join(filter_result_arr))
  82. def check(self, key_box: QLineEdit, category_box: QTextEdit, parent_result_box: QTextBrowser):
  83. key_text = key_box.text()
  84. if len(key_text) == 0:
  85. QMessageBox.warning(self, "输入提示", "请输入待筛选关键词")
  86. return False
  87. cnt = 0
  88. for pattern in [category_pattern, digit_pattern, english_pattern]:
  89. if pattern.search(key_text) is not None:
  90. cnt = cnt + 1
  91. if cnt > 1:
  92. QMessageBox.warning(self, "提示", "一次只能使用一种正则筛选项")
  93. return False
  94. category_text = category_box.toPlainText()
  95. if category_pattern.search(key_text) is not None and len(category_text) == 0:
  96. QMessageBox.warning(self, "提示", "使用类别筛选,请输入待筛选的类别关键词")
  97. return False
  98. if parent_result_box is None:
  99. file_path = self.filePathBox.text()
  100. if len(file_path) == 0:
  101. QMessageBox.warning(self, "提示", "请选择带筛选文件")
  102. return False
  103. elif len(parent_result_box.toPlainText()) == 0:
  104. QMessageBox.warning(self, "提示", "上级结果中没有数据")
  105. return False
  106. return True
  107. def deal(self, key_box: QLineEdit, category_box: QTextEdit, parent_result_box: QTextBrowser):
  108. key_text = key_box.text()
  109. parent_key_arr = None
  110. if parent_result_box is None:
  111. with open(self.filePathBox.text(), 'r', encoding='utf-8') as f:
  112. parent_key_arr = [content.replace("\n", "") for content in f.readlines()]
  113. else:
  114. parent_key_arr = parent_result_box.toPlainText().split("\n")
  115. filter_result_arr = None
  116. if category_pattern.search(key_text) is not None:
  117. filter_result_arr = set()
  118. categoryKeyArray = category_box.toPlainText().splitlines()
  119. for categoryKey in categoryKeyArray:
  120. filter_result_arr.update(self.filter(parent_key_arr, key_text, "类别", categoryKey))
  121. elif digit_pattern.search(key_text) is not None:
  122. filter_result_arr = self.filter(parent_key_arr, key_text, "数字", "0-9")
  123. elif english_pattern.search(key_text) is not None:
  124. filter_result_arr = self.filter(parent_key_arr, key_text, "字母", "A-Za-z")
  125. else:
  126. filter_result_arr = self.filter(parent_key_arr, key_text)
  127. return len(parent_key_arr), len(filter_result_arr), filter_result_arr
  128. def filter(self, originArray, inputText, oldStr=None, newStr=None):
  129. resultArray = []
  130. key_pattern = None
  131. filter_pattern = None
  132. if oldStr is not None and len(oldStr) > 0:
  133. key_pattern = re.compile(inputText.replace("[{}]".format(oldStr), ""))
  134. else:
  135. key_pattern = re.compile(inputText)
  136. if newStr is not None and len(newStr) > 0:
  137. filter_pattern = re.compile("[{}]".format(newStr))
  138. for originKey in originArray:
  139. if key_pattern.search(originKey) is not None:
  140. if filter_pattern is not None:
  141. if filter_pattern.search(originKey) is not None:
  142. resultArray.append(originKey)
  143. else:
  144. resultArray.append(originKey)
  145. return resultArray
  146. if __name__ == "__main__":
  147. app = QApplication(sys.argv)
  148. myWin = MyMainForm()
  149. myWin.show()
  150. sys.exit(app.exec())