Browse Source

Java版金钱挖掘 项目新建

ChenYL 1 year ago
commit
de6505a529

+ 40 - 0
.gitignore

@@ -0,0 +1,40 @@
+target/
+!.mvn/wrapper/maven-wrapper.jar
+!**/src/main/**/target/
+!**/src/test/**/target/
+
+### IntelliJ IDEA ###
+.idea/modules.xml
+.idea/jarRepositories.xml
+.idea/compiler.xml
+.idea/libraries/
+*.iws
+*.iml
+*.ipr
+
+### Eclipse ###
+.apt_generated
+.classpath
+.factorypath
+.project
+.settings
+.springBeans
+.sts4-cache
+
+### NetBeans ###
+/nbproject/private/
+/nbbuild/
+/dist/
+/nbdist/
+/.nb-gradle/
+build/
+!**/src/main/**/build/
+!**/src/test/**/build/
+
+### VS Code ###
+.vscode/
+
+### Mac OS ###
+.DS_Store
+
+.idea

+ 62 - 0
pom.xml

@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>top.zhixinghe1</groupId>
+    <artifactId>money-mining</artifactId>
+    <version>1.0-SNAPSHOT</version>
+
+    <properties>
+        <maven.compiler.source>21</maven.compiler.source>
+        <maven.compiler.target>21</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    </properties>
+
+    <dependencies>
+        <!-- 文本相似度计算工具 -->
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-text</artifactId>
+            <version>1.11.0</version>
+        </dependency>
+        <!-- 命令行进度条 -->
+        <dependency>
+            <groupId>me.tongfei</groupId>
+            <artifactId>progressbar</artifactId>
+            <version>0.10.0</version>
+        </dependency>
+        <dependency>
+            <groupId>redis.clients</groupId>
+            <artifactId>jedis</artifactId>
+            <version>5.1.0</version>
+        </dependency>
+    </dependencies>
+
+    <!-- 配置阿里云仓库 -->
+    <repositories>
+        <repository>
+            <id>aliyun-repos</id>
+            <url>https://maven.aliyun.com/repository/public</url>
+            <releases>
+                <enabled>true</enabled>
+            </releases>
+            <snapshots>
+                <enabled>false</enabled>
+            </snapshots>
+        </repository>
+    </repositories>
+    <pluginRepositories>
+        <pluginRepository>
+            <id>aliyun-repos</id>
+            <url>https://maven.aliyun.com/repository/public</url>
+            <releases>
+                <enabled>true</enabled>
+            </releases>
+            <snapshots>
+                <enabled>false</enabled>
+            </snapshots>
+        </pluginRepository>
+    </pluginRepositories>
+</project>

+ 82 - 0
src/main/java/top/zhixinghe1/money/AggApplication.java

@@ -0,0 +1,82 @@
+package top.zhixinghe1.money;
+
+import redis.clients.jedis.Jedis;
+import redis.clients.jedis.JedisPool;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.Objects;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * 文本聚合 程序
+ */
+public class AggApplication {
+
+    public static void main(String[] args) {
+//        String dataDirPath = "D:\\Documents\\ChenYL\\CodeRepository\\money-mining-python\\data";
+        String dataDirPath = "D:\\Documents\\ChenYL\\CodeRepository\\money-mining-python\\data\\test";
+
+        // 判断传入路径是否有效
+        File dataDir = new File(dataDirPath);
+        if (!dataDir.exists() || !dataDir.isDirectory()) {
+            System.out.println(String.format("数据目录路径不存在,%s", dataDirPath));
+            return;
+        }
+
+        // 判断关键资源文件是否存在
+        List<String> fileNameList = Arrays.asList("长尾词_合并_分词.txt", "长尾词_合并_聚合.txt", "长尾词_合并.txt", "长尾词_合并_倒排索引.txt");
+        for (String fileName : fileNameList) {
+            String resFilePath = String.join(File.separator, dataDirPath, fileName);
+            File resfile = new File(resFilePath);
+            if (!resfile.exists() || !resfile.isFile()) {
+                System.out.println(String.format("文件不存在!文件路径:%s", resFilePath));
+                return;
+            }
+        }
+
+        // 归档历史数据文件
+        File[] files = dataDir.listFiles();
+        Pattern aggFilePattern = Pattern.compile("长尾词_合并_聚合_\\d+_\\d+.txt");
+        List<File> historyAggFile = Arrays.stream(files).filter(file -> {
+            Matcher matcher = aggFilePattern.matcher(file.getName());
+            return matcher.find();
+        }).collect(Collectors.toList());
+        if (Objects.nonNull(historyAggFile) || historyAggFile.size() > 0) {
+            SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
+            String archivePath = String.join(File.separator, dataDirPath, String.format("长尾词_聚合_归档_%s", sdf.format(new Date())));
+            File archiveDir = new File(archivePath);
+            archiveDir.mkdirs();
+            for (File historyFile : historyAggFile) {
+                String destPath = String.join(File.separator, archivePath, historyFile.getName());
+                System.out.println(destPath);
+                historyFile.renameTo(new File(destPath));
+            }
+        }
+
+        JedisPool pool = new JedisPool("127.0.0.1", 6379);
+        try (Jedis jedis = pool.getResource()) {
+            try (FileReader reader = new FileReader(String.join(File.separator, dataDirPath, "长尾词_合并_分词.txt"));
+                 BufferedReader br = new BufferedReader(reader)) {
+                String line = null;
+                while ((line = br.readLine()) != null) {
+                    System.out.println(line);
+                    System.out.println("暂停");
+                }
+
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+}

+ 11 - 0
src/main/java/top/zhixinghe1/money/AggCalConsumer.java

@@ -0,0 +1,11 @@
+package top.zhixinghe1.money;
+
+/**
+ * 文本聚合 计算
+ */
+public class AggCalConsumer implements Runnable{
+    @Override
+    public void run() {
+
+    }
+}

+ 11 - 0
src/main/java/top/zhixinghe1/money/AggCalProducer.java

@@ -0,0 +1,11 @@
+package top.zhixinghe1.money;
+
+/**
+ * 文本聚合 发起
+ */
+public class AggCalProducer implements Runnable{
+    @Override
+    public void run() {
+
+    }
+}

+ 11 - 0
src/main/java/top/zhixinghe1/money/AggResultWriter.java

@@ -0,0 +1,11 @@
+package top.zhixinghe1.money;
+
+/**
+ * 文本聚合 结果存储
+ */
+public class AggResultWriter implements Runnable{
+    @Override
+    public void run() {
+
+    }
+}

+ 138 - 0
src/main/java/top/zhixinghe1/money/Test.java

@@ -0,0 +1,138 @@
+package top.zhixinghe1.money;
+
+import me.tongfei.progressbar.ProgressBar;
+import org.apache.commons.text.similarity.CosineSimilarity;
+import redis.clients.jedis.Jedis;
+import redis.clients.jedis.JedisPool;
+import redis.clients.jedis.Pipeline;
+import redis.clients.jedis.Response;
+
+import java.io.File;
+import java.io.Reader;
+import java.text.SimpleDateFormat;
+import java.time.LocalTime;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.logging.SimpleFormatter;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+public class Test {
+    public static void main(String[] args) throws InterruptedException {
+//       "QQ邮箱格式怎么写" "QQ邮箱格式如何写", 相似度约0.8
+//        Map<CharSequence, Integer> leftVector = Arrays.asList("QQ", "邮箱", "格式", "怎么", "写").stream().collect(Collectors.toMap(v -> v, v -> 1));
+//        Map<CharSequence, Integer> rightVector = Arrays.asList("QQ", "邮箱", "格式", "如何", "写").stream().collect(Collectors.toMap(v -> v, v -> 1));
+//        CosineSimilarity cosineSimilarity = new CosineSimilarity();
+//        long start = System.currentTimeMillis();
+//        for (int i = 0; i < 300; i++) {
+//            Double v = cosineSimilarity.cosineSimilarity(leftVector, rightVector);
+//        }
+//        System.out.println(System.currentTimeMillis() - start);
+
+
+//        try (ProgressBar pb1 = new ProgressBar("文本聚合计算", 100)) {
+//            for (int i = 0; i < 100; i++) {
+//                Thread.sleep(1000);
+//                pb1.step();
+//            }
+//        }
+
+        JedisPool pool = new JedisPool("127.0.0.1", 6379);
+        try (Jedis jedis = pool.getResource()) {
+//            // Store & Retrieve a simple string
+//            jedis.set("foo", "bar");
+//            String foo = jedis.get("foo");
+//            System.out.println(); // prints bar
+//
+//            // Store & Retrieve a HashMap
+//            Map<String, String> hash = new HashMap<>();;
+//            hash.put("name", "John");
+//            hash.put("surname", "Smith");
+//            hash.put("company", "Redis");
+//            hash.put("age", "29");
+//            jedis.hset("user-session:123", hash);
+//            Map<String, String> stringStringMap = jedis.hgetAll("user-session:123");
+//            System.out.println(stringStringMap);
+//            // Prints: {name=John, surname=Smith, company=Redis, age=29}
+//
+//            // 核心:获取分词列表 进行相似度计算
+//            //长尾词获取
+//            String word = jedis.hget("word", "1");
+//            List<String> word1 = jedis.hmget("word", "1", "2");
+//            // 管道使用
+//            try (Pipeline pipelined = jedis.pipelined();) {
+//                // 分词List获取
+////                pipelined.lpush("testList", "QQ");
+////                pipelined.lpush("testList", "邮箱");
+////                pipelined.lpush("testList", "格式");
+////                pipelined.lpush("testList", "怎么");
+////                pipelined.lpush("testList", "写");
+////                pipelined.sync();
+//                List<String> testList = jedis.lrange("testList", 0L, -1L);
+//
+//                // 批量获取、list对象为空
+////                pipelined.lpush("testList2", "QQ");
+////                pipelined.lpush("testList2", "邮箱");
+////                pipelined.lpush("testList2", "格式");
+////                pipelined.lpush("testList2", "如何");
+////                pipelined.lpush("testList2", "写");
+//                pipelined.sync();
+//                pipelined.lrange("testList", 0L, -1L);
+//                pipelined.lrange("testList2", 0L, -1L);
+//                pipelined.lrange("testList3", 0L, -1L);
+//                List<Object> allResult =  pipelined.syncAndReturnAll();
+//
+//                // 倒排索引获取
+//                pipelined.sadd("testIndex1", "1", "2", "3", "4");
+//                pipelined.sadd("testIndex2", "2", "4", "5", "7");
+//                pipelined.sync();
+//                Set<String> sinter = jedis.sunion("testIndex1", "testIndex2");
+//
+//                // 删除set中指定元素
+//                long testIndex2 = jedis.srem("testIndex2", "4", "8");
+
+//                // 批量删除key
+//                Set<String> keys = jedis.keys("word*");
+//                String[] array = keys.toArray(String[]::new);
+//                jedis.del(array);
+//                System.out.println("暂停");
+//            }
+//            TODO bitmap使用
+            jedis.setbit("testBit", 32, true);
+            jedis.setbit("testBit", 1, true);
+            jedis.setbit("testBit", 15, true);
+            jedis.setbit("testBit", 27, true);
+            List<Integer> list = Arrays.asList(1, 15, 27, 32);
+            for (Integer p : list) {
+                boolean testBit = jedis.getbit("testBit", p);
+                if (testBit) {
+                    System.out.println(String.format("redis testBit 设置成功"));
+                } else {
+                    System.out.println(String.format("redis testBit 设置失败"));
+                }
+            }
+            BitSet bitSet = BitSet.valueOf(jedis.get("testBit").getBytes());
+            for (Integer p : list) {
+                boolean testBit = bitSet.get(p);
+                if (testBit) {
+                    System.out.println(String.format("bitset testBit 设置成功"));
+                } else {
+                    System.out.println(String.format("bitset testBit 设置失败"));
+                }
+            }
+            System.out.println("暂停");
+        }
+
+        System.out.println("暂停");
+    }
+}

+ 4 - 0
src/main/java/top/zhixinghe1/money/constant.java

@@ -0,0 +1,4 @@
+package top.zhixinghe1.money;
+
+public class constant {
+}