|
|
@@ -0,0 +1,106 @@
|
|
|
+package top.zhixinghe1.money.agg.entity;
|
|
|
+
|
|
|
+import org.apache.commons.text.similarity.CosineSimilarity;
|
|
|
+
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.BitSet;
|
|
|
+import java.util.HashMap;
|
|
|
+import java.util.HashSet;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
+import java.util.Objects;
|
|
|
+import java.util.Set;
|
|
|
+import java.util.concurrent.LinkedBlockingQueue;
|
|
|
+
|
|
|
+/**
|
|
|
+ * 计算任务对象
|
|
|
+ */
|
|
|
+public class CalRunable implements Runnable {
|
|
|
+
|
|
|
+ private int start;
|
|
|
+
|
|
|
+ private int end;
|
|
|
+
|
|
|
+ private Map<Integer, Word> wordCache = new HashMap();
|
|
|
+
|
|
|
+ private Map<String, Set<Integer>> indexCache = new HashMap();
|
|
|
+
|
|
|
+ private BitSet bitmap = null;
|
|
|
+
|
|
|
+ private CosineSimilarity cosineSimilarity = new CosineSimilarity();
|
|
|
+
|
|
|
+ private Double aggThreshold = 0.8;
|
|
|
+
|
|
|
+ private LinkedBlockingQueue<CalResult> queue;
|
|
|
+
|
|
|
+ private Set<Integer> indexSet = new HashSet<>();
|
|
|
+ private List<String> result = new ArrayList<>();
|
|
|
+
|
|
|
+ public CalRunable(int start, int end, Map<Integer, Word> wordCache, Map<String, Set<Integer>> indexCache, BitSet bitmap, LinkedBlockingQueue<CalResult> queue) {
|
|
|
+ this.start = start;
|
|
|
+ this.end = end;
|
|
|
+ this.wordCache = wordCache;
|
|
|
+ this.indexCache = indexCache;
|
|
|
+ this.bitmap = bitmap;
|
|
|
+ this.queue = queue;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void run() {
|
|
|
+ try {
|
|
|
+ for (int i = start; i <= end; i++) {
|
|
|
+ CalResult calResult = null;
|
|
|
+ if (cal(i)) {
|
|
|
+ calResult = new CalResult(true, new ArrayList<>(result));
|
|
|
+ } else {
|
|
|
+ calResult = new CalResult(false, null);
|
|
|
+ }
|
|
|
+ calResult.setEndStatus(i == end);
|
|
|
+ queue.put(calResult);
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private boolean cal(int i) {
|
|
|
+ // 判断是否已进行计算
|
|
|
+ if (bitmap.get(i)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 清除上一轮的数据
|
|
|
+ indexSet.clear();
|
|
|
+ result.clear();
|
|
|
+
|
|
|
+ Word word = wordCache.get(i);
|
|
|
+ if (Objects.isNull(word.getStemMap()) || word.getStemMap().size() == 0) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ bitmap.set(i, true);
|
|
|
+ result.add(word.getKey());
|
|
|
+ for (CharSequence stem : word.getStemMap().keySet()) {
|
|
|
+ Set<Integer> positions = indexCache.get(stem);
|
|
|
+ for (Integer position : positions) {
|
|
|
+ if (bitmap.get(position)) {
|
|
|
+ positions.remove(position);
|
|
|
+ } else {
|
|
|
+ indexSet.add(position);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for (Integer index : indexSet) {
|
|
|
+ Word candicateWord = wordCache.get(index);
|
|
|
+ if (Objects.isNull(candicateWord.getStemMap())) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ Double v = cosineSimilarity.cosineSimilarity(word.getStemMap(), candicateWord.getStemMap());
|
|
|
+ if (v < aggThreshold) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ result.add(candicateWord.getKey());
|
|
|
+ }
|
|
|
+ // 输出计算结果
|
|
|
+ return result.size() > 1;
|
|
|
+ }
|
|
|
+}
|