|
@@ -1,4 +1,6 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
+import json
|
|
|
|
|
+import os.path
|
|
|
import re
|
|
import re
|
|
|
import sys
|
|
import sys
|
|
|
from functools import partial
|
|
from functools import partial
|
|
@@ -12,17 +14,20 @@ category_pattern = re.compile(r'\[类别\]')
|
|
|
digit_pattern = re.compile(r'\[数字\]')
|
|
digit_pattern = re.compile(r'\[数字\]')
|
|
|
english_pattern = re.compile(r'\[字母\]')
|
|
english_pattern = re.compile(r'\[字母\]')
|
|
|
|
|
|
|
|
|
|
+CHARACTER_FILTER_STR = "[字母]"
|
|
|
|
|
+DIGIT_FILTER_STR = "[数字]"
|
|
|
|
|
+CATEGORY_FILTER_STR = "[类别]"
|
|
|
|
|
|
|
|
-class MyMainForm(QWidget, Ui_Form):
|
|
|
|
|
|
|
+CONFIG_FILE_PATH = "./config.json"
|
|
|
|
|
+CONFIG_ITEM_LAST_SELECT_FILE_PATH = "lastSelectFilePath"
|
|
|
|
|
|
|
|
- CHARACTER_FILTER_STR = "[字母]"
|
|
|
|
|
- DIGIT_FILTER_STR = "[数字]"
|
|
|
|
|
- CATEGORY_FILTER_STR = "[类别]"
|
|
|
|
|
|
|
+class MyMainForm(QWidget, Ui_Form):
|
|
|
|
|
|
|
|
def __init__(self, parent=None):
|
|
def __init__(self, parent=None):
|
|
|
super(MyMainForm, self).__init__(parent)
|
|
super(MyMainForm, self).__init__(parent)
|
|
|
self.setupUi(self)
|
|
self.setupUi(self)
|
|
|
self.bind()
|
|
self.bind()
|
|
|
|
|
+ self.loadConfig()
|
|
|
|
|
|
|
|
def bind(self):
|
|
def bind(self):
|
|
|
self.toolDict = {
|
|
self.toolDict = {
|
|
@@ -40,37 +45,46 @@ class MyMainForm(QWidget, Ui_Form):
|
|
|
self.fourCharacterBtn.objectName(): self.fourKeyBox
|
|
self.fourCharacterBtn.objectName(): self.fourKeyBox
|
|
|
}
|
|
}
|
|
|
self.resultDict = {
|
|
self.resultDict = {
|
|
|
- self.firstFilterBtn.objectName(): (self.firstKeyBox, self.firstCategoryBox, self.firstResultBox, None),
|
|
|
|
|
- self.secondFilterBtn.objectName(): (self.secondKeyBox, self.secondCategoryBox, self.secondResultBox, self.firstResultBox),
|
|
|
|
|
- self.threeFilterBtn.objectName(): (self.threeKeyBox, self.threeCategoryBox, self.threeResultBox, self.secondResultBox),
|
|
|
|
|
- self.fourFilterBtn.objectName(): (self.fourKeyBox, self.fourCategoryBox, self.fourResultBox, self.threeResultBox)
|
|
|
|
|
|
|
+ self.firstFilterBtn.objectName(): (self.firstKeyBox, self.firstCategoryBox, self.firstResultBox, None, self.result_label_1),
|
|
|
|
|
+ self.secondFilterBtn.objectName(): (self.secondKeyBox, self.secondCategoryBox, self.secondResultBox, self.firstResultBox, self.result_label_2),
|
|
|
|
|
+ self.threeFilterBtn.objectName(): (self.threeKeyBox, self.threeCategoryBox, self.threeResultBox, self.secondResultBox, self.result_label_3),
|
|
|
|
|
+ self.fourFilterBtn.objectName(): (self.fourKeyBox, self.fourCategoryBox, self.fourResultBox, self.threeResultBox, self.result_label_4)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
self.fileBtn.clicked.connect(self.selectFile)
|
|
self.fileBtn.clicked.connect(self.selectFile)
|
|
|
|
|
|
|
|
- self.firstCategoryBtn.clicked.connect(partial(self.add_filter_str, self.firstCategoryBtn, self.CATEGORY_FILTER_STR))
|
|
|
|
|
- self.firstDigitBtn.clicked.connect(partial(self.add_filter_str, self.firstDigitBtn, self.DIGIT_FILTER_STR))
|
|
|
|
|
- self.firstCharacterBtn.clicked.connect(partial(self.add_filter_str, self.firstCharacterBtn, self.CHARACTER_FILTER_STR))
|
|
|
|
|
|
|
+ self.firstCategoryBtn.clicked.connect(partial(self.add_filter_str, self.firstCategoryBtn, CATEGORY_FILTER_STR))
|
|
|
|
|
+ self.firstDigitBtn.clicked.connect(partial(self.add_filter_str, self.firstDigitBtn, DIGIT_FILTER_STR))
|
|
|
|
|
+ self.firstCharacterBtn.clicked.connect(partial(self.add_filter_str, self.firstCharacterBtn, CHARACTER_FILTER_STR))
|
|
|
self.firstFilterBtn.clicked.connect(partial(self.submit, self.firstFilterBtn))
|
|
self.firstFilterBtn.clicked.connect(partial(self.submit, self.firstFilterBtn))
|
|
|
|
|
|
|
|
- self.secondCategoryBtn.clicked.connect(partial(self.add_filter_str, self.secondCategoryBtn, self.CATEGORY_FILTER_STR))
|
|
|
|
|
- self.secondDigitBtn.clicked.connect(partial(self.add_filter_str, self.secondDigitBtn, self.DIGIT_FILTER_STR))
|
|
|
|
|
- self.secondCharacterBtn.clicked.connect(partial(self.add_filter_str, self.secondCharacterBtn, self.CHARACTER_FILTER_STR))
|
|
|
|
|
|
|
+ self.secondCategoryBtn.clicked.connect(partial(self.add_filter_str, self.secondCategoryBtn, CATEGORY_FILTER_STR))
|
|
|
|
|
+ self.secondDigitBtn.clicked.connect(partial(self.add_filter_str, self.secondDigitBtn, DIGIT_FILTER_STR))
|
|
|
|
|
+ self.secondCharacterBtn.clicked.connect(partial(self.add_filter_str, self.secondCharacterBtn, CHARACTER_FILTER_STR))
|
|
|
self.secondFilterBtn.clicked.connect(partial(self.submit, self.secondFilterBtn))
|
|
self.secondFilterBtn.clicked.connect(partial(self.submit, self.secondFilterBtn))
|
|
|
|
|
|
|
|
- self.threeCategoryBtn.clicked.connect(partial(self.add_filter_str, self.threeCategoryBtn, self.CATEGORY_FILTER_STR))
|
|
|
|
|
- self.threeDigitBtn.clicked.connect(partial(self.add_filter_str, self.threeDigitBtn, self.DIGIT_FILTER_STR))
|
|
|
|
|
- self.threeCharacterBtn.clicked.connect(partial(self.add_filter_str, self.threeCharacterBtn, self.CHARACTER_FILTER_STR))
|
|
|
|
|
|
|
+ self.threeCategoryBtn.clicked.connect(partial(self.add_filter_str, self.threeCategoryBtn, CATEGORY_FILTER_STR))
|
|
|
|
|
+ self.threeDigitBtn.clicked.connect(partial(self.add_filter_str, self.threeDigitBtn, DIGIT_FILTER_STR))
|
|
|
|
|
+ self.threeCharacterBtn.clicked.connect(partial(self.add_filter_str, self.threeCharacterBtn, CHARACTER_FILTER_STR))
|
|
|
self.threeFilterBtn.clicked.connect(partial(self.submit, self.threeFilterBtn))
|
|
self.threeFilterBtn.clicked.connect(partial(self.submit, self.threeFilterBtn))
|
|
|
|
|
|
|
|
- self.fourCategoryBtn.clicked.connect(partial(self.add_filter_str, self.fourCategoryBtn, self.CATEGORY_FILTER_STR))
|
|
|
|
|
- self.fourDigitBtn.clicked.connect(partial(self.add_filter_str, self.fourDigitBtn, self.DIGIT_FILTER_STR))
|
|
|
|
|
- self.fourCharacterBtn.clicked.connect(partial(self.add_filter_str, self.fourCharacterBtn, self.CHARACTER_FILTER_STR))
|
|
|
|
|
|
|
+ self.fourCategoryBtn.clicked.connect(partial(self.add_filter_str, self.fourCategoryBtn, CATEGORY_FILTER_STR))
|
|
|
|
|
+ self.fourDigitBtn.clicked.connect(partial(self.add_filter_str, self.fourDigitBtn, DIGIT_FILTER_STR))
|
|
|
|
|
+ self.fourCharacterBtn.clicked.connect(partial(self.add_filter_str, self.fourCharacterBtn, CHARACTER_FILTER_STR))
|
|
|
self.fourFilterBtn.clicked.connect(partial(self.submit, self.fourFilterBtn))
|
|
self.fourFilterBtn.clicked.connect(partial(self.submit, self.fourFilterBtn))
|
|
|
|
|
|
|
|
|
|
+ def loadConfig(self):
|
|
|
|
|
+ if os.path.isfile(CONFIG_FILE_PATH):
|
|
|
|
|
+ with open(CONFIG_FILE_PATH, 'r', encoding='utf-8') as f:
|
|
|
|
|
+ config = json.loads(f.read())
|
|
|
|
|
+ self.filePathBox.setText(config[CONFIG_ITEM_LAST_SELECT_FILE_PATH])
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def selectFile(self):
|
|
def selectFile(self):
|
|
|
- file_path = QFileDialog.getOpenFileName(self, "选择文件")
|
|
|
|
|
- self.filePathBox.setText(file_path[0])
|
|
|
|
|
|
|
+ file_path, file_type = QFileDialog.getOpenFileName(self, "选择文件")
|
|
|
|
|
+ with open(CONFIG_FILE_PATH, 'w', encoding='utf-8') as f:
|
|
|
|
|
+ f.write(json.dumps({CONFIG_ITEM_LAST_SELECT_FILE_PATH: file_path}))
|
|
|
|
|
+ self.filePathBox.setText(file_path)
|
|
|
|
|
|
|
|
def add_filter_str(self, btn_widget: QPushButton, filter_text):
|
|
def add_filter_str(self, btn_widget: QPushButton, filter_text):
|
|
|
key_box = self.toolDict[btn_widget.objectName()]
|
|
key_box = self.toolDict[btn_widget.objectName()]
|
|
@@ -78,33 +92,14 @@ class MyMainForm(QWidget, Ui_Form):
|
|
|
|
|
|
|
|
def submit(self, filter_btn: QPushButton):
|
|
def submit(self, filter_btn: QPushButton):
|
|
|
|
|
|
|
|
- key_box, category_box, result_box, parent_result_box = self.resultDict[filter_btn.objectName()]
|
|
|
|
|
|
|
+ key_box, category_box, result_box, parent_result_box, result_label = self.resultDict[filter_btn.objectName()]
|
|
|
|
|
|
|
|
if not self.check(key_box, category_box, parent_result_box):
|
|
if not self.check(key_box, category_box, parent_result_box):
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
- key_text = key_box.text()
|
|
|
|
|
-
|
|
|
|
|
- originKeyArray = None
|
|
|
|
|
- if parent_result_box is None:
|
|
|
|
|
- with open(self.filePathBox.text(), 'r', encoding='utf-8') as f:
|
|
|
|
|
- originKeyArray = [content.replace("\n", "") for content in f.readlines()]
|
|
|
|
|
- else:
|
|
|
|
|
- originKeyArray = parent_result_box.toPlainText().split("\n")
|
|
|
|
|
-
|
|
|
|
|
- resultKeyArray = []
|
|
|
|
|
- if category_pattern.search(key_text) is not None:
|
|
|
|
|
- categoryKeyArray = category_box.toPlainText().splitlines()
|
|
|
|
|
- for categoryKey in categoryKeyArray:
|
|
|
|
|
- resultKeyArray.extend(self.filter(originKeyArray, key_text, "类别", categoryKey))
|
|
|
|
|
- elif digit_pattern.search(key_text) is not None:
|
|
|
|
|
- resultKeyArray.extend(self.filter(originKeyArray, key_text, "数字", "0-9"))
|
|
|
|
|
- elif english_pattern.search(key_text) is not None:
|
|
|
|
|
- resultKeyArray.extend(self.filter(originKeyArray, key_text, "字母", "A-Za-z"))
|
|
|
|
|
- else:
|
|
|
|
|
- resultKeyArray.extend(self.filter(originKeyArray, key_text))
|
|
|
|
|
-
|
|
|
|
|
- result_box.setText("\n".join(resultKeyArray))
|
|
|
|
|
|
|
+ before_filter_cnt, after_filter_cnt, filter_result_arr = self.deal(key_box, category_box, parent_result_box)
|
|
|
|
|
+ result_label.setText("提取结果:原始数据%s条,筛选后%s条" % (before_filter_cnt, after_filter_cnt))
|
|
|
|
|
+ result_box.setText("\n".join(filter_result_arr))
|
|
|
|
|
|
|
|
def check(self, key_box: QLineEdit, category_box: QTextEdit, parent_result_box: QTextBrowser):
|
|
def check(self, key_box: QLineEdit, category_box: QTextEdit, parent_result_box: QTextBrowser):
|
|
|
key_text = key_box.text()
|
|
key_text = key_box.text()
|
|
@@ -136,6 +131,31 @@ class MyMainForm(QWidget, Ui_Form):
|
|
|
|
|
|
|
|
return True
|
|
return True
|
|
|
|
|
|
|
|
|
|
+ def deal(self, key_box: QLineEdit, category_box: QTextEdit, parent_result_box: QTextBrowser):
|
|
|
|
|
+ key_text = key_box.text()
|
|
|
|
|
+
|
|
|
|
|
+ parent_key_arr = None
|
|
|
|
|
+ if parent_result_box is None:
|
|
|
|
|
+ with open(self.filePathBox.text(), 'r', encoding='utf-8') as f:
|
|
|
|
|
+ parent_key_arr = [content.replace("\n", "") for content in f.readlines()]
|
|
|
|
|
+ else:
|
|
|
|
|
+ parent_key_arr = parent_result_box.toPlainText().split("\n")
|
|
|
|
|
+
|
|
|
|
|
+ filter_result_arr = None
|
|
|
|
|
+ if category_pattern.search(key_text) is not None:
|
|
|
|
|
+ filter_result_arr = set()
|
|
|
|
|
+ categoryKeyArray = category_box.toPlainText().splitlines()
|
|
|
|
|
+ for categoryKey in categoryKeyArray:
|
|
|
|
|
+ filter_result_arr.update(self.filter(parent_key_arr, key_text, "类别", categoryKey))
|
|
|
|
|
+ elif digit_pattern.search(key_text) is not None:
|
|
|
|
|
+ filter_result_arr = self.filter(parent_key_arr, key_text, "数字", "0-9")
|
|
|
|
|
+ elif english_pattern.search(key_text) is not None:
|
|
|
|
|
+ filter_result_arr = self.filter(parent_key_arr, key_text, "字母", "A-Za-z")
|
|
|
|
|
+ else:
|
|
|
|
|
+ filter_result_arr = self.filter(parent_key_arr, key_text)
|
|
|
|
|
+
|
|
|
|
|
+ return len(parent_key_arr), len(filter_result_arr), filter_result_arr
|
|
|
|
|
+
|
|
|
def filter(self, originArray, inputText, oldStr=None, newStr=None):
|
|
def filter(self, originArray, inputText, oldStr=None, newStr=None):
|
|
|
resultArray = []
|
|
resultArray = []
|
|
|
key_pattern = None
|
|
key_pattern = None
|