|
|
@@ -1,5 +1,6 @@
|
|
|
package top.zhixinghe1.money.agg;
|
|
|
|
|
|
+import cn.hutool.core.math.Combination;
|
|
|
import org.apache.commons.text.similarity.CosineSimilarity;
|
|
|
import org.roaringbitmap.RoaringBitmap;
|
|
|
import top.zhixinghe1.money.agg.entity.CalInfo;
|
|
|
@@ -11,6 +12,7 @@ import top.zhixinghe1.money.agg.entity.Word;
|
|
|
import java.math.BigDecimal;
|
|
|
import java.math.RoundingMode;
|
|
|
import java.util.ArrayList;
|
|
|
+import java.util.Collections;
|
|
|
import java.util.List;
|
|
|
import java.util.Objects;
|
|
|
import java.util.concurrent.LinkedBlockingQueue;
|
|
|
@@ -55,7 +57,8 @@ public class CalRunnable implements Runnable {
|
|
|
// 计算聚合结果
|
|
|
List<String> result = cal(pos);
|
|
|
// 返回计算结果
|
|
|
- CalResult calResult = Objects.nonNull(result) ? new CalResult(true, result) : new CalResult(false, null);
|
|
|
+ CalResult calResult = result.size() >= calInfo.getAggResultThreshold() ? new CalResult(true, result) : new CalResult(false, null);
|
|
|
+ calResult.setAggNum(result.size() == 0 ? 1 : result.size());
|
|
|
calResult.setEndStatus(pos == end);
|
|
|
queue.put(calResult);
|
|
|
}
|
|
|
@@ -71,22 +74,26 @@ public class CalRunnable implements Runnable {
|
|
|
*/
|
|
|
private List<String> cal(int pos) {
|
|
|
// 判断是否已进行计算
|
|
|
- if (!calResource.checkAndSetCalStatus(pos)) {
|
|
|
- return null;
|
|
|
+ if (calResource.checkAndSetCalStatus(pos)) {
|
|
|
+ return Collections.EMPTY_LIST;
|
|
|
}
|
|
|
|
|
|
// 获取主词
|
|
|
Word word = calResource.getWord(pos);
|
|
|
if (Objects.isNull(word.getStemMap()) || word.getStemMap().size() == 0) {
|
|
|
- return null;
|
|
|
+ return Collections.EMPTY_LIST;
|
|
|
}
|
|
|
|
|
|
- // 计算候选词位图
|
|
|
- RoaringBitmap finalBitmap = new RoaringBitmap();
|
|
|
- for (CharSequence stem : word.getStemMap().keySet()) {
|
|
|
- RoaringBitmap stemBitmap = calResource.getWordBitmap((String) stem);
|
|
|
- finalBitmap.or(stemBitmap);
|
|
|
+ // 计算候选词位图,组合下具有相关的关键词才进行计算
|
|
|
+ Combination combination = new Combination(word.getStemMap().keySet().toArray(String[]::new));
|
|
|
+ List<String[]> select = combination.select(2);
|
|
|
+ List<RoaringBitmap> andBitmapList = new ArrayList<>(select.size());
|
|
|
+ for (String[] strings : select) {
|
|
|
+ RoaringBitmap firstBitmap = calResource.getWordBitmap(strings[0]);
|
|
|
+ RoaringBitmap secondBitmap = calResource.getWordBitmap(strings[1]);
|
|
|
+ andBitmapList.add(RoaringBitmap.and(firstBitmap, secondBitmap));
|
|
|
}
|
|
|
+ RoaringBitmap finalBitmap = RoaringBitmap.or(andBitmapList.iterator());
|
|
|
finalBitmap.andNot(calResource.getUsedBitmap());
|
|
|
|
|
|
// 设置主词
|
|
|
@@ -109,7 +116,6 @@ public class CalRunnable implements Runnable {
|
|
|
result.add(candidateWord.getKey());
|
|
|
}
|
|
|
|
|
|
- // 输出计算结果
|
|
|
- return result.size() >= calInfo.getAggResultThreshold() ? result : null;
|
|
|
+ return result;
|
|
|
}
|
|
|
}
|