|
@@ -1,14 +1,17 @@
|
|
|
package top.zhixinghe1.money;
|
|
package top.zhixinghe1.money;
|
|
|
|
|
|
|
|
|
|
+import me.tongfei.progressbar.ProgressBar;
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
|
|
|
import java.io.BufferedInputStream;
|
|
import java.io.BufferedInputStream;
|
|
|
import java.io.BufferedOutputStream;
|
|
import java.io.BufferedOutputStream;
|
|
|
import java.io.BufferedReader;
|
|
import java.io.BufferedReader;
|
|
|
|
|
+import java.io.BufferedWriter;
|
|
|
import java.io.File;
|
|
import java.io.File;
|
|
|
import java.io.FileInputStream;
|
|
import java.io.FileInputStream;
|
|
|
import java.io.FileOutputStream;
|
|
import java.io.FileOutputStream;
|
|
|
import java.io.FileReader;
|
|
import java.io.FileReader;
|
|
|
|
|
+import java.io.FileWriter;
|
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
|
import java.text.SimpleDateFormat;
|
|
import java.text.SimpleDateFormat;
|
|
|
import java.util.ArrayList;
|
|
import java.util.ArrayList;
|
|
@@ -24,6 +27,7 @@ import java.util.Set;
|
|
|
import java.util.concurrent.CopyOnWriteArraySet;
|
|
import java.util.concurrent.CopyOnWriteArraySet;
|
|
|
import java.util.concurrent.ExecutorService;
|
|
import java.util.concurrent.ExecutorService;
|
|
|
import java.util.concurrent.Executors;
|
|
import java.util.concurrent.Executors;
|
|
|
|
|
+import java.util.concurrent.LinkedBlockingQueue;
|
|
|
import java.util.concurrent.TimeUnit;
|
|
import java.util.concurrent.TimeUnit;
|
|
|
import java.util.function.Function;
|
|
import java.util.function.Function;
|
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Matcher;
|
|
@@ -167,32 +171,45 @@ public class AggApplication {
|
|
|
// 分割计算任务
|
|
// 分割计算任务
|
|
|
List<CalTask> calTasks = avgSplitTask(totalWord, perTaskNum);
|
|
List<CalTask> calTasks = avgSplitTask(totalWord, perTaskNum);
|
|
|
|
|
|
|
|
|
|
+ LinkedBlockingQueue<CalResult> queue = new LinkedBlockingQueue();
|
|
|
|
|
+
|
|
|
// 提交任务
|
|
// 提交任务
|
|
|
ExecutorService executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
|
|
ExecutorService executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
|
|
|
-// ExecutorService executorService = Executors.newFixedThreadPool(1);
|
|
|
|
|
for (CalTask calTask : calTasks) {
|
|
for (CalTask calTask : calTasks) {
|
|
|
- executorService.submit(new CalRunable(calTask.getStartPos(), calTask.getEndPos(), dataDirPath, wordCache, indexCache, bitmap));
|
|
|
|
|
|
|
+ executorService.submit(new CalRunable(calTask.getStartPos(), calTask.getEndPos(), dataDirPath, wordCache, indexCache, bitmap, queue));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // 等待任务执行完成
|
|
|
|
|
- executorService.awaitTermination(12, TimeUnit.HOURS);
|
|
|
|
|
|
|
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
|
|
|
|
|
+ String aggFilePath = String.join(File.separator, dataDirPath, String.format("长尾词_合并_聚合_%s.txt", sdf.format(new Date())));
|
|
|
|
|
+ try (ProgressBar pb = new ProgressBar("文本聚合计算", totalWord);
|
|
|
|
|
+ FileWriter fileWriter = new FileWriter(aggFilePath);
|
|
|
|
|
+ BufferedWriter bufferedWriter = new BufferedWriter(fileWriter);) {
|
|
|
|
|
+ int taskNum = calTasks.size();
|
|
|
|
|
+ int currentTaskProgress = 0;
|
|
|
|
|
+ while (true) {
|
|
|
|
|
+ CalResult take = queue.take();
|
|
|
|
|
+ if (take.isAggStatus()) {
|
|
|
|
|
+ for (String word : take.getSimilarWords()) {
|
|
|
|
|
+ bufferedWriter.write(word);
|
|
|
|
|
+ bufferedWriter.write("\n");
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- // 合并计算结果
|
|
|
|
|
- List<File> aggResultFiles = Arrays.stream(dataDir.listFiles()).filter(file -> aggFilePattern.matcher(file.getName()).find()).collect(Collectors.toList());
|
|
|
|
|
- if (aggResultFiles.size() == 0) {
|
|
|
|
|
- System.out.println("没有找到任何计算分结果,任务结束");
|
|
|
|
|
- return;
|
|
|
|
|
- }
|
|
|
|
|
- String aggFilePath = String.join(File.separator, dataDirPath, "长尾词_合并_聚合.txt");
|
|
|
|
|
- try (FileOutputStream fileOutputStream = new FileOutputStream(aggFilePath);
|
|
|
|
|
- BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream)) {
|
|
|
|
|
- for (File aggResultFile : aggResultFiles) {
|
|
|
|
|
- try (FileInputStream fileInputStream = new FileInputStream(aggResultFile);
|
|
|
|
|
- BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream)) {
|
|
|
|
|
- bufferedOutputStream.write(bufferedInputStream.readAllBytes());
|
|
|
|
|
|
|
+ bufferedWriter.write("\n");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (take.isEndStatus()) {
|
|
|
|
|
+ currentTaskProgress ++ ;
|
|
|
|
|
+ }
|
|
|
|
|
+ // 更新发呆进度
|
|
|
|
|
+ pb.step();
|
|
|
|
|
+
|
|
|
|
|
+ if (taskNum == currentTaskProgress) {
|
|
|
|
|
+ break;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ executorService.awaitTermination(1, TimeUnit.MINUTES);
|
|
|
|
|
+ System.out.println("聚合任务执行完成");
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
private static List<CalTask> avgSplitTask(int total, int internal) {
|
|
private static List<CalTask> avgSplitTask(int total, int internal) {
|