{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "\n", "ORIG_FILE = \"./data/范用性关键词-分词结果.csv\"\n", "DEST_FILE = \"./data/范用性关键词-分词结果-过滤停用词.csv\"\n", "DEST_FILE_FILTER = \"./data/范用性关键词-分词结果-过滤停用词-词频大于300.csv\"\n", "STOP_WORD_DIR = \"./data/stopwords\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(ORIG_FILE, names=['key','count'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | key | \n", "count | \n", "
|---|---|---|
| 0 | \n", "怎么 | \n", "1051516 | \n", "
| 1 | \n", "的 | \n", "123009 | \n", "
| 2 | \n", "怎么办 | \n", "93937 | \n", "
| 3 | \n", "怎么样 | \n", "91070 | \n", "
| 4 | \n", "做 | \n", "63034 | \n", "
| ... | \n", "... | \n", "... | \n", "
| 116625 | \n", "做文 | \n", "1 | \n", "
| 116626 | \n", "提微商 | \n", "1 | \n", "
| 116627 | \n", "仰卧 | \n", "1 | \n", "
| 116628 | \n", "起坐 | \n", "1 | \n", "
| 116629 | \n", "仰卧起坐 | \n", "1 | \n", "
116630 rows × 2 columns
\n", "| \n", " | count | \n", "
|---|---|
| count | \n", "115534.000000 | \n", "
| mean | \n", "27.613802 | \n", "
| std | \n", "311.900416 | \n", "
| min | \n", "1.000000 | \n", "
| 25% | \n", "1.000000 | \n", "
| 50% | \n", "2.000000 | \n", "
| 75% | \n", "6.000000 | \n", "
| max | \n", "63034.000000 | \n", "