| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- package top.zhixinghe1.money.agg.entity;
- import org.apache.commons.text.similarity.CosineSimilarity;
- import java.util.ArrayList;
- import java.util.BitSet;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Map;
- import java.util.Objects;
- import java.util.Set;
- import java.util.concurrent.LinkedBlockingQueue;
- /**
- * 计算任务对象
- */
- public class CalRunable implements Runnable {
- private int start;
- private int end;
- private Map<Integer, Word> wordCache = new HashMap();
- private Map<String, Set<Integer>> indexCache = new HashMap();
- private BitSet bitmap = null;
- private CosineSimilarity cosineSimilarity = new CosineSimilarity();
- private Double aggThreshold = 0.8;
- private LinkedBlockingQueue<CalResult> queue;
- private Set<Integer> indexSet = new HashSet<>();
- private List<String> result = new ArrayList<>();
- public CalRunable(int start, int end, Map<Integer, Word> wordCache, Map<String, Set<Integer>> indexCache, BitSet bitmap, LinkedBlockingQueue<CalResult> queue) {
- this.start = start;
- this.end = end;
- this.wordCache = wordCache;
- this.indexCache = indexCache;
- this.bitmap = bitmap;
- this.queue = queue;
- }
- @Override
- public void run() {
- try {
- for (int i = start; i <= end; i++) {
- CalResult calResult = null;
- if (cal(i)) {
- calResult = new CalResult(true, new ArrayList<>(result));
- } else {
- calResult = new CalResult(false, null);
- }
- calResult.setEndStatus(i == end);
- queue.put(calResult);
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- private boolean cal(int i) {
- // 判断是否已进行计算
- if (bitmap.get(i)) {
- return false;
- }
- // 清除上一轮的数据
- indexSet.clear();
- result.clear();
- Word word = wordCache.get(i);
- if (Objects.isNull(word.getStemMap()) || word.getStemMap().size() == 0) {
- return false;
- }
- bitmap.set(i, true);
- result.add(word.getKey());
- for (CharSequence stem : word.getStemMap().keySet()) {
- Set<Integer> positions = indexCache.get(stem);
- for (Integer position : positions) {
- if (bitmap.get(position)) {
- positions.remove(position);
- } else {
- indexSet.add(position);
- }
- }
- }
- for (Integer index : indexSet) {
- Word candicateWord = wordCache.get(index);
- if (Objects.isNull(candicateWord.getStemMap())) {
- continue;
- }
- Double v = cosineSimilarity.cosineSimilarity(word.getStemMap(), candicateWord.getStemMap());
- if (v < aggThreshold) {
- continue;
- }
- result.add(candicateWord.getKey());
- }
- // 输出计算结果
- return result.size() > 1;
- }
- }
|