package top.zhixinghe1.money.agg.entity; import org.apache.commons.text.similarity.CosineSimilarity; import java.util.ArrayList; import java.util.BitSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.LinkedBlockingQueue; /** * 计算任务对象 */ public class CalRunable implements Runnable { private int start; private int end; private Map wordCache = new HashMap(); private Map> indexCache = new HashMap(); private BitSet bitmap = null; private CosineSimilarity cosineSimilarity = new CosineSimilarity(); private Double aggThreshold = 0.8; private LinkedBlockingQueue queue; private Set indexSet = new HashSet<>(); private List result = new ArrayList<>(); public CalRunable(int start, int end, Map wordCache, Map> indexCache, BitSet bitmap, LinkedBlockingQueue queue) { this.start = start; this.end = end; this.wordCache = wordCache; this.indexCache = indexCache; this.bitmap = bitmap; this.queue = queue; } @Override public void run() { try { for (int i = start; i <= end; i++) { CalResult calResult = null; if (cal(i)) { calResult = new CalResult(true, new ArrayList<>(result)); } else { calResult = new CalResult(false, null); } calResult.setEndStatus(i == end); queue.put(calResult); } } catch (Exception e) { e.printStackTrace(); } } private boolean cal(int i) { // 判断是否已进行计算 if (bitmap.get(i)) { return false; } // 清除上一轮的数据 indexSet.clear(); result.clear(); Word word = wordCache.get(i); if (Objects.isNull(word.getStemMap()) || word.getStemMap().size() == 0) { return false; } bitmap.set(i, true); result.add(word.getKey()); for (CharSequence stem : word.getStemMap().keySet()) { Set positions = indexCache.get(stem); for (Integer position : positions) { if (bitmap.get(position)) { positions.remove(position); } else { indexSet.add(position); } } } for (Integer index : indexSet) { Word candicateWord = wordCache.get(index); if (Objects.isNull(candicateWord.getStemMap())) { continue; } Double v = cosineSimilarity.cosineSimilarity(word.getStemMap(), candicateWord.getStemMap()); if (v < aggThreshold) { continue; } result.add(candicateWord.getKey()); } // 输出计算结果 return result.size() > 1; } }