-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathiTutor.py
80 lines (65 loc) · 2.37 KB
/
iTutor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from ast import keyword
import time
from unittest import result
import jieba
import wordcloud
import xlrd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
# 待查询信息
# end_year = 2022
# begin_year = input("请输入最早年份:")
book = xlrd.open_workbook("iTutor_setting/search.xls")
sheet = book.sheet_by_index(0)
author = sheet.cell_value(rowx=1,colx=0)
work_unit = sheet.cell_value(rowx=1,colx=1)
# get网站
wd = webdriver.Chrome(service=Service(r"iTutor_tool/chromedriver"))
wd.get('https://kns.cnki.net/kns8/AdvSearch?dbcode=CFLS')
wd.implicitly_wait(2)
# 切换至专业检索
switch_majorsearch=wd.find_element(By.CSS_SELECTOR,'li[name="majorSearch"]')
switch_majorsearch.click()
# 输入检索式
switch_input_majorsearch = wd.find_element(By.CSS_SELECTOR,'.textarea-major')
search_text = "AU % "+ "'"+ author +"' "+ "AND AF % "+ "'"+ work_unit +"'"
switch_input_majorsearch.send_keys(search_text)
wd.find_element(By.CSS_SELECTOR,'.btn-search').click()
# 切换至下一页
author_output = ""
var = 1
while var == 1 :
element = wd.find_element(By.CLASS_NAME,'result-table-list')
Names = element.find_elements(By.CLASS_NAME,'name')
# Data = element.find_element(By.CLASS_NAME,'data')
# Authors = element.find_elements(By.CLASS_NAME,'author') #将来也许会用到,获取作者信息
for Name in Names :
author_output = author_output + ' '+Name.text
try :
time.sleep(1)
switch_next = wd.find_element(By.ID,'PageNext').click()
time.sleep(1)
except NoSuchElementException :
break
# except StaleElementReferenceException :
# break
wd.quit()
# jieba分词
jieba.load_userdict(r"iTutor_setting/jieba_dict.txt")
words = jieba.lcut(author_output)
words_output = ' '.join(words)
# wordcloud
from wordcloud import STOPWORDS
# 读取自定义屏蔽词
add_stopwords = open("iTutor_setting/stopwords.txt","rt",encoding='utf-8')
for line in add_stopwords.readlines():
line = str(line)
line = line[:-1]
STOPWORDS.add(line)
add_stopwords.close()
wcloud = wordcloud.WordCloud(font_path = "iTutor_setting/font.ttf", width = 1920, height = 1080,background_color = "white",max_words = 300,stopwords=STOPWORDS)
wcloud.generate(words_output)
wcloud.to_file("outfile.png")