sql
CREATE TABLE raw_texts (
id INT AUTO_INCREMENT PRIMARY KEY,
content TEXT NOT NULL,
source VARCHAR(100), 数据来源(如新闻网站、用户评论等)
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
2、关键词表(`keywords`)
sql
CREATE TABLE keywords (
id INT AUTO_INCREMENT PRIMARY KEY,
keyword VARCHAR(100) NOT NULL UNIQUE, 关键词唯一
category VARCHAR(50) DEFAULT 'general' 可选分类(如政治、科技等)
);
3、关键词出现记录表(`keyword_occurrences`)
sql
CREATE TABLE keyword_occurrences (
id INT AUTO_INCREMENT PRIMARY KEY,
text_id INT NOT NULL,
keyword_id INT NOT NULL,
frequency INT NOT NULL, 在该文本中出现次数
position TEXT, 出现位置(可选,如 12,45,78)
FOREIGN KEY (text_id) REFERENCES raw_texts(id) ON DELETE CASCADE,
FOREIGN KEY (keyword_id) REFERENCES keywords(id) ON DELETE CASCADE,
INDEX idx_text_keyword (text_id, keyword_id)
);
2、提取关键词
words = extract_keywords(text)
word_counts = Counter(words)
for word, freq in word_counts.items():
2.1 插入关键词(若不存在)
cursor.execute(
INSERT IGNORE INTO keywords (keyword) VALUES (%s),
(word,)
)
cursor.execute(SELECT id FROM keywords WHERE keyword = %s, (word,))
keyword_id = cursor.fetchone()[0]
sql
SELECT k.keyword, SUM(o.frequency) AS total_freq
FROM keywords k
JOIN keyword_occurrences o ON k.id = o.keyword_id
GROUP BY k.id
ORDER BY total_freq DESC
LIMIT 10;
2、查询某关键词在最近7天的趋势
sql
SELECT DATE(r.created_at) AS date_day, SUM(o.frequency) AS daily_freq
FROM keyword_occurrences o
JOIN raw_texts r ON o.text_id = r.id
JOIN keywords k ON o.keyword_id = k.id
WHERE k.keyword = '人工智能'
AND r.created_at = CURDATE() INTERVAL 7 DAY
GROUP BY date_day
ORDER BY date_day;