Kaynağa Gözat

完善代码结构

ChenYL 2 yıl önce
ebeveyn
işleme
9534a10a3c

+ 41 - 0
src/main/java/top/zhixinghe1/money/agg/entity/CalResult.java

@@ -0,0 +1,41 @@
+package top.zhixinghe1.money.agg.entity;
+
+import java.util.List;
+
+public class CalResult {
+
+    private boolean endStatus;
+
+    private boolean aggStatus;
+
+    private List<String> similarWords;
+
+    public CalResult(boolean aggStatus, List<String> similarWords) {
+        this.aggStatus = aggStatus;
+        this.similarWords = similarWords;
+    }
+
+    public boolean isAggStatus() {
+        return aggStatus;
+    }
+
+    public void setAggStatus(boolean aggStatus) {
+        this.aggStatus = aggStatus;
+    }
+
+    public List<String> getSimilarWords() {
+        return similarWords;
+    }
+
+    public void setSimilarWords(List<String> similarWords) {
+        this.similarWords = similarWords;
+    }
+
+    public boolean isEndStatus() {
+        return endStatus;
+    }
+
+    public void setEndStatus(boolean endStatus) {
+        this.endStatus = endStatus;
+    }
+}

+ 106 - 0
src/main/java/top/zhixinghe1/money/agg/entity/CalRunable.java

@@ -0,0 +1,106 @@
+package top.zhixinghe1.money.agg.entity;
+
+import org.apache.commons.text.similarity.CosineSimilarity;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.LinkedBlockingQueue;
+
+/**
+ * 计算任务对象
+ */
+public class CalRunable implements Runnable {
+
+    private int start;
+
+    private int end;
+
+    private Map<Integer, Word> wordCache = new HashMap();
+
+    private Map<String, Set<Integer>> indexCache = new HashMap();
+
+    private BitSet bitmap = null;
+
+    private CosineSimilarity cosineSimilarity = new CosineSimilarity();
+
+    private Double aggThreshold = 0.8;
+
+    private LinkedBlockingQueue<CalResult> queue;
+
+    private Set<Integer> indexSet = new HashSet<>();
+    private List<String> result = new ArrayList<>();
+
+    public CalRunable(int start, int end, Map<Integer, Word> wordCache, Map<String, Set<Integer>> indexCache, BitSet bitmap, LinkedBlockingQueue<CalResult> queue) {
+        this.start = start;
+        this.end = end;
+        this.wordCache = wordCache;
+        this.indexCache = indexCache;
+        this.bitmap = bitmap;
+        this.queue = queue;
+    }
+
+    @Override
+    public void run() {
+        try {
+            for (int i = start; i <= end; i++) {
+                CalResult calResult = null;
+                if (cal(i)) {
+                    calResult = new CalResult(true, new ArrayList<>(result));
+                } else {
+                    calResult = new CalResult(false, null);
+                }
+                calResult.setEndStatus(i == end);
+                queue.put(calResult);
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private boolean cal(int i) {
+        // 判断是否已进行计算
+        if (bitmap.get(i)) {
+            return false;
+        }
+
+        // 清除上一轮的数据
+        indexSet.clear();
+        result.clear();
+
+        Word word = wordCache.get(i);
+        if (Objects.isNull(word.getStemMap()) || word.getStemMap().size() == 0) {
+            return false;
+        }
+        bitmap.set(i, true);
+        result.add(word.getKey());
+        for (CharSequence stem : word.getStemMap().keySet()) {
+            Set<Integer> positions = indexCache.get(stem);
+            for (Integer position : positions) {
+                if (bitmap.get(position)) {
+                    positions.remove(position);
+                } else {
+                    indexSet.add(position);
+                }
+            }
+        }
+        for (Integer index : indexSet) {
+            Word candicateWord = wordCache.get(index);
+            if (Objects.isNull(candicateWord.getStemMap())) {
+                continue;
+            }
+            Double v = cosineSimilarity.cosineSimilarity(word.getStemMap(), candicateWord.getStemMap());
+            if (v < aggThreshold) {
+                continue;
+            }
+            result.add(candicateWord.getKey());
+        }
+        // 输出计算结果
+        return result.size() > 1;
+    }
+}

+ 37 - 0
src/main/java/top/zhixinghe1/money/agg/entity/CalTask.java

@@ -0,0 +1,37 @@
+package top.zhixinghe1.money.agg.entity;
+
+import java.io.Serial;
+import java.io.Serializable;
+
+/**
+ * 计算任务边界(左右闭区间)
+ */
+public class CalTask implements Serializable {
+    @Serial
+    private static final long serialVersionUID = 6711062995204035815L;
+
+    private int startPos;
+
+    private int endPos;
+
+    public CalTask(int startPos, int endPos) {
+        this.startPos = startPos;
+        this.endPos = endPos;
+    }
+
+    public int getStartPos() {
+        return startPos;
+    }
+
+    public void setStartPos(int startPos) {
+        this.startPos = startPos;
+    }
+
+    public int getEndPos() {
+        return endPos;
+    }
+
+    public void setEndPos(int endPos) {
+        this.endPos = endPos;
+    }
+}

+ 45 - 0
src/main/java/top/zhixinghe1/money/agg/entity/Word.java

@@ -0,0 +1,45 @@
+package top.zhixinghe1.money.agg.entity;
+
+import java.io.Serial;
+import java.io.Serializable;
+import java.util.Map;
+
+/**
+ * 长尾词对象
+ */
+public class Word implements Serializable {
+
+    @Serial
+    private static final long serialVersionUID = 888376712090774661L;
+
+    /**
+     * 长尾词
+     */
+    private String key;
+
+    /**
+     * 分词列表
+     */
+    private Map<CharSequence, Integer> stemMap;
+
+    public Word(String key, Map<CharSequence, Integer> stemMap) {
+        this.key = key;
+        this.stemMap = stemMap;
+    }
+
+    public String getKey() {
+        return key;
+    }
+
+    public void setKey(String key) {
+        this.key = key;
+    }
+
+    public Map<CharSequence, Integer> getStemMap() {
+        return stemMap;
+    }
+
+    public void setStemMap(Map<CharSequence, Integer> stemMap) {
+        this.stemMap = stemMap;
+    }
+}