前言
以前偶然浏览到wallhaven网站,画质真的不错,加上当时已经接触到Python爬虫,所以就想着做一个脚本将图片下载下来,后来经过断断续续几个月的修改,实现了图片获取,壁纸切换,图片用后删除,每日更新等一系列的自动化,还对Python的多线程,多进程进行了一些研究和应用。最后解决了困扰了几个月的R18爬取问题。(原因是没登录,挺无语的…)
时隔一年,重新运行了一下程序,依旧那么丝滑(大概相当于吃了一吨德芙吧)。重新整理了一下思路,发出来供大家参考,因为能力有限,错误在所难免,望大佬指出。
准备工作
引入Python库
import ctypes #系统应用壁纸
import datetime #获取时间
import os #用于创建文件夹等操作
import random #随机选取壁纸
import socket #用于检测网络
import sys #用于退出脚本
import time #时间
import threading #多线程
import requests #爬取wallhaven网页
import getpass #获取用户名
from bs4 import BeautifulSoup #对爬取内容进行提取解析
from threading import Lock #进程锁
函数汇总
以下为实现此程序功能的所有函数
step1
获取概览页图片链接
def step1(url): # 批量获取网站图片链接
global referer2
referer2 = url
while 1:
try:
if internet() == 0:
break
except:
print("未联网,10秒后自动重试")
time.sleep(10)
try: #重复两次,增强程序稳定性
if nsfw==1:
global s
s = requests.session()
html = s.get('https://wallhaven.cc/login', headers=h).content
a = BeautifulSoup(html, 'lxml')
b = a.select('#login > input[type=hidden]:nth-child(1)')
data = {'username': '******', 'password': '*****'} #这里需要自己的用户名和密码
for i in b:
data['_token'] = i['value']
s.post('https://wallhaven.cc/auth/login', data, headers=h)
rq = s.get(url, timeout=5)
else:
rq = requests.get(url, headers=h, timeout=5)
except:
try:
if nsfw==1:
s = requests.session()
html = s.get('https://wallhaven.cc/login', headers=h).content
a = BeautifulSoup(html, 'lxml')
b = a.select('#login > input[type=hidden]:nth-child(1)')
data = {'username': '****', 'password': '*****'}
for i in b:
data['_token'] = i['value']
s.post('https://wallhaven.cc/auth/login', data, headers=h)
rq = s.get(url, timeout=5)
else:
rq = requests.get(url, headers=h, timeout=5)
except:
print('与网站连接失败,请稍后重试')
sys.exit()
#图片地址选取
a = rq.content
b = BeautifulSoup(a, 'lxml')#thumbs > section > ul > li:nth-child(1) > figure > div > span.wall-res
if nsfw==1:
c = b.select("div[id='thumbs']> section> ul > li > figure > a.preview[href]")
else:
c = b.select("div[id='thumbs']> section> ul > li > figure > a[href]")
for i in range(0, len(c)):
gg = ':nth-child(' + str(i + 1) + ')'
if nsfw==0:
d = b.select("#thumbs > section > ul > li" + gg + " > figure > div > span")[0].text
else:
try:
d=b.select("#thumbs > section > ul > li"+ gg + " > figure > div > span.wall-res")[0].text
except:continue
e = d.split('x')
if nsfw==0:
if (int(e[0]) / int(e[1]) < 1.35) or (int(e[0]) / int(e[1]) > 2.1): #分辨率要求
continue
z1.append(c[i]["href"])
print("\n此页预计获取%d张图片\n" % len(z1))
'''
Step2
获取图片源地址
def step2(url,s=None): # 打开图片链接,寻找图片源地址,并下载图片
h['referer']=referer2
global id2
global id
while 1:
try:
if internet() == 0:
break
except:
print("未联网,10秒后自动重试")
time.sleep(10)
try:
if nsfw==1:
rq = s.get(url, timeout=5)
else:
rq = requests.get(url, headers=h,timeout=5)
print("正在获取第%d张图片" % id)
except:
id2 = id2 + 1
return
a = rq.content
b = BeautifulSoup(a, 'lxml')
c = b.select("img[id='wallpaper']")
global referer
referer = url
for i in c:
file_name = i["src"].split('.')[-1]
file_name = zone+"/壁纸/" + str(id) + "." + file_name
step3(i["src"], file_name)
Step3
下载图片,嵌套于step2
def step3(url, file_name): # 下载图片
while 1:
try:
if internet() == 0:
break
except:
print("未联网,10秒后自动重试")
time.sleep(10)
try:
h = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/86.0.4240.80 Safari/537.36 Edg/86.0.622.48",
'Referer': referer}
a = requests.get(url, headers=h, timeout=5)
except:
global id2
id2 = id2 + 1
return
f = open(file_name, 'wb') #写入
f.write(a.content)
f.close()
global id
global id3
id = id + 1
id3 = id3 + 1
Step4
将前三步串联起来
def step4(index, categories, purity, resolution, frequency, type): # 执行程序
url = "https://wallhaven.cc/search?categories=" + categories + "&purity=" +\
purity + "&atleast=" + resolution + "&topRange=" + frequency + "&sorting=" + type + "&order=desc&page="
for i in range(index, index + 1):
url = url + str(i)
step1(url)
if len(z1) == 0:
print("\n\n\n\n抱歉,已经是最后一页了哦,重新选择吧!\n")
main()
for i in z1:
if nsfw==1:
step2(i,s)
else:
step2(i)
Internet
测试网络
def internet():
s = socket.socket()
s.settimeout(3)
status = s.connect_ex(("www.baidu.com", 443))
return status
Thread
第一个线程,负责监听控制端输入,遇到change则立即更换壁纸
def thread():
global start
while 1:
var = input("输入'change'以立即切换壁纸\n")
if var == 'change':
path = zone+'/壁纸/'
lock.acquire()
if os.path.exists(zone+"/壁纸") == 0:
os.mkdir(zone+"/壁纸")
file = os.listdir(zone+"/壁纸")
length = len(os.listdir(zone+"/壁纸"))
if length > 1:
filepath = path + random.choice(file)
ctypes.windll.user32.SystemParametersInfoW(20, 0, filepath, 0)
lock.release()
print("壁纸更换成功!(十秒后可再次切换)\n")
time.sleep(10)
try:
if (os.path.exists(filepath) == 1):
os.remove(filepath)
except:
continue
else:
lock.release()
print('壁纸已用完,请等待下一次刷新')
start = 1
break
Thread2
第二个线程,负责定时切换壁纸
def thread2(times):
global restart
path=zone+'/壁纸/'
while 1:
lock.acquire()
if os.path.exists(zone+"/壁纸") == 0:
os.mkdir(zone+"/壁纸")
length = len(os.listdir(zone+"/壁纸"))
if length!=0:
restart=1
file = os.listdir(zone+"/壁纸")
file2=random.choice(file)
filepath = path +file2
ctypes.windll.user32.SystemParametersInfoW(20, 0, filepath, 0)
lock.release()
print("\n提示:壁纸更换成功! %s已应用 当前时间:%s\n"%(file2,datetime.datetime.now().time()))
time.sleep(4)
try:
if (os.path.exists(filepath) == 1):
os.remove(filepath)
time.sleep(int(times)*60-4)
except:
time.sleep(int(times)*60-4)
else :
lock.release()
restart=0
time.sleep(10)
Get_Background_Path
当前壁纸获取路径,因为有些人希望保存壁纸
def Get_Background_Path():
CDBF = "C:/Users/" + getpass.getuser() + "/AppData/Roaming/Microsoft/Windows/Themes/CachedFiles"
return CDBF
Main
主函数,设置选择界面,并将上述函数有机组合
def main():
global z1
global id2
global start2
global id3
global start
z1.clear()
purity = '110'
path = zone+"/壁纸/"
if os.path.exists(zone+"/壁纸") == 0:
os.mkdir(zone+"/壁纸")
print("已创建文件夹",path)
print("壁纸都会下载在这里哦")
print("壁纸用后即删,不必担心储存空间")
while 1:
print("检测网络中...\n")
try:
if internet() == 0:
print("网络已连接\n")
break
except:
print("未联网,30秒后自动重试")
time.sleep(30)
times = input("请输入壁纸切换频率: (分钟) (输入正整数哦 默认20分钟)\n")
if times=='':
times='20'
times=int(times)
if times<=0:
sys.exit()
times=str(times)
type = input("类型选择:热门/排行榜/随机/风景/日更 \n 根据文字选择: hot/toplist/random/views/date_added (例如 选择热门则输入hot 默认hot)\n")
if type=='':
type='hot'
if type == "toplist":
frequency = input("时间选择:日排行/三日排行/七日排行/月排行/近三月排行/近六月排行/年排行 \n 根据文字选择: 1d/3d/1w/1M/3M/6M/1y (不要输错哦) \n")
else:
frequency = ""
resolution = input("最低清晰度设置 格式'xxxx'x'xxxx' 不限请输入'0x0' 默认为 1920x1080 (注意按格式输入)\n")
if resolution == "":
resolution = '1920x1080'
categories = input("是否动漫模式,默认普通模式(y/n)\n")
if categories == 'y':
categories = '010'
else:
categories = '110'
nsf=input('WARNING: r18? (Y/N) 注意:此模式需手动切换壁纸! \n')
if nsf=='Y':
global nsfw
global on
on = 0
nsfw=1
if categories=='110':
categories='111'
purity='001'
page = input("想要从第几页开始获取?(默认从第1页获取) \n")
try:
page = int(page)
except:
page = 1
print("将从第%i页开始获取\n" % page)
print("稍等,正在下载图片ing...")
print("\n国外网站,下载可能较慢")
for i in os.listdir(zone+"/壁纸"): # 清空文件夹
try:
os.remove(zone+"/壁纸/" + i)
except:
continue
if nsfw==0:
if start2 == 1:
t2=threading.Thread(target=thread2,args=(times,))
t2.start()
start2=0
starttime = datetime.datetime.now()
step4(page, categories, purity, resolution, frequency, type)
print('%d张壁纸获取成功,%d张壁纸获取失败' % (id3 - 1, id2))
id2 = 0
id3 = 1
endtime = datetime.datetime.now()
print("初始化完成,用时%i秒" % (endtime - starttime).seconds)
while 1: # 更换壁纸
if nsfw == 1:
on = input('是否继续获取下一页(y/n)')
if on == 'y':
on = 1
length = len(os.listdir(zone+"/壁纸"))
if nsfw==1 and on ==1:
length=0
on ==0
if length <= 2:
print("\n获取第%i页" % (page + 1))
while 1:
print("检测网络中...")
try:
if internet() == 0:
print("网络已连接")
break
except:
print("未联网,30秒后自动重试")
time.sleep(30)
page += 1
z1.clear()
starttime = datetime.datetime.now()
step4(page, categories, purity, resolution, frequency, type)
endtime = datetime.datetime.now()
print('%d张壁纸获取成功,%d张壁纸获取失败 用时%i秒' % (id3 - 1, id2,(endtime - starttime).seconds))
id2 = 0
id3 = 1
if nsfw==0:
if (start == 1):
thread1 = threading.Thread(target=thread)
thread1.start()
start = 0
if restart!=0:
time.sleep(int(times)*60)
代码全文
将以下代码用pyinstaller直接编译后即可使用(注意:不能选后台运行!)
import ctypes
import datetime
import os
import random
import socket
import sys
import time
import threading
import requests
import getpass
from bs4 import BeautifulSoup
from threading import Lock
h = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.80 Safari/537.36 Edg/86.0.622.48"}
z1 = []
id = 1
id2 = 0
id3 = 1
start2=1
restart=1
lock=Lock()
nsfw=0
def Get_Background_Path():
CDBF = "C:/Users/" + getpass.getuser() + "/AppData/Roaming/Microsoft/Windows/Themes/CachedFiles"
return CDBF
def thread():
global start
while 1:
var = input("输入'change'以立即切换壁纸\n")
if var == 'change':
path = zone+'/壁纸/'
lock.acquire()
if os.path.exists(zone+"/壁纸") == 0:
os.mkdir(zone+"/壁纸")
file = os.listdir(zone+"/壁纸")
length = len(os.listdir(zone+"/壁纸"))
if length > 1:
filepath = path + random.choice(file)
ctypes.windll.user32.SystemParametersInfoW(20, 0, filepath, 0)
lock.release()
print("壁纸更换成功!(十秒后可再次切换)\n")
time.sleep(10)
try:
if (os.path.exists(filepath) == 1):
os.remove(filepath)
except:
continue
else:
lock.release()
print('壁纸已用完,请等待下一次刷新')
start = 1
break
def thread2(times):
global restart
path=zone+'/壁纸/'
while 1:
lock.acquire()
if os.path.exists(zone+"/壁纸") == 0:
os.mkdir(zone+"/壁纸")
length = len(os.listdir(zone+"/壁纸"))
if length!=0:
restart=1
file = os.listdir(zone+"/壁纸")
file2=random.choice(file)
filepath = path +file2
ctypes.windll.user32.SystemParametersInfoW(20, 0, filepath, 0)
lock.release()
print("\n提示:壁纸更换成功! %s已应用 当前时间:%s\n"%(file2,datetime.datetime.now().time()))
time.sleep(4)
try:
if (os.path.exists(filepath) == 1):
os.remove(filepath)
time.sleep(int(times)*60-4)
except:
time.sleep(int(times)*60-4)
else :
lock.release()
restart=0
time.sleep(10)
def step1(url): # 批量获取网站图片链接
global referer2
referer2 = url
while 1:
try:
if internet() == 0:
break
except:
print("未联网,10秒后自动重试")
time.sleep(10)
try:
if nsfw==1:
global s
s = requests.session()
html = s.get('https://wallhaven.cc/login', headers=h).content
a = BeautifulSoup(html, 'lxml')
b = a.select('#login > input[type=hidden]:nth-child(1)')
data = {'username': '******', 'password': '*******'} #用户名及密码
for i in b:
data['_token'] = i['value']
s.post('https://wallhaven.cc/auth/login', data, headers=h)
rq = s.get(url, timeout=5)
else:
rq = requests.get(url, headers=h, timeout=5)
except:
try:
if nsfw==1:
s = requests.session()
html = s.get('https://wallhaven.cc/login', headers=h).content
a = BeautifulSoup(html, 'lxml')
b = a.select('#login > input[type=hidden]:nth-child(1)')
data = {'username': '******', 'password': '*******'}
for i in b:
data['_token'] = i['value']
s.post('https://wallhaven.cc/auth/login', data, headers=h)
rq = s.get(url, timeout=5)
else:
rq = requests.get(url, headers=h, timeout=5)
except:
print('与网站连接失败,请稍后重试')
sys.exit()
a = rq.content
b = BeautifulSoup(a, 'lxml')#thumbs > section > ul > li:nth-child(1) > figure > div > span.wall-res
if nsfw==1:
c = b.select("div[id='thumbs']> section> ul > li > figure > a.preview[href]")
else:
c = b.select("div[id='thumbs']> section> ul > li > figure > a[href]")
#三个坑 href链接有两个,span链接有两个且有些没有值,获取r18一直要cookie
for i in range(0, len(c)):
gg = ':nth-child(' + str(i + 1) + ')'
if nsfw==0:
d = b.select("#thumbs > section > ul > li" + gg + " > figure > div > span")[0].text
else:
try:
d=b.select("#thumbs > section > ul > li"+ gg + " > figure > div > span.wall-res")[0].text
except:continue
e = d.split('x')
if nsfw==0:
if (int(e[0]) / int(e[1]) < 1.35) or (int(e[0]) / int(e[1]) > 2.1):
continue
z1.append(c[i]["href"]) # thumbs > section > ul > li:nth-child(1) > figure > a.preview
print("\n此页预计获取%d张图片\n" % len(z1))
def step2(url,s=None): # 打开图片链接,获取图片下载地址,并下载图片
h['referer']=referer2
global id2
global id
while 1:
try:
if internet() == 0:
break
except:
print("未联网,10秒后自动重试")
time.sleep(10)
try:
if nsfw==1:
rq = s.get(url, timeout=5)
else:
rq = requests.get(url, headers=h,timeout=5)
print("正在获取第%d张图片" % id)
except:
id2 = id2 + 1
return
a = rq.content
b = BeautifulSoup(a, 'lxml')
c = b.select("img[id='wallpaper']")
global referer
referer = url
for i in c:
file_name = i["src"].split('.')[-1]
file_name = zone+"/壁纸/" + str(id) + "." + file_name
step3(i["src"], file_name)
def step3(url, file_name): # 下载图片
while 1:
try:
if internet() == 0:
break
except:
print("未联网,10秒后自动重试")
time.sleep(10)
try:
h = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.80 Safari/537.36 Edg/86.0.622.48",
'Referer': referer}
a = requests.get(url, headers=h, timeout=5)
except:
global id2
id2 = id2 + 1
return
f = open(file_name, 'wb')
f.write(a.content)
f.close()
global id
global id3
id = id + 1
id3 = id3 + 1
def step4(index, categories, purity, resolution, frequency, type): # 执行程序
url = "https://wallhaven.cc/search?categories=" + categories + "&purity=" + purity + "&atleast=" + resolution + "&topRange=" + frequency + "&sorting=" + type + "&order=desc&page="
for i in range(index, index + 1):
url = url + str(i)
step1(url)
if len(z1) == 0:
print("\n\n\n\n抱歉,已经是最后一页了哦,重新选择吧!\n")
main()
for i in z1:
if nsfw==1:
step2(i,s)
else:
step2(i)
def internet():
s = socket.socket()
s.settimeout(3)
status = s.connect_ex(("www.baidu.com", 443))
return status
def main():
global z1
global id2
global start2
global id3
global start
z1.clear()
purity = '110'
path = zone+"/壁纸/"
if os.path.exists(zone+"/壁纸") == 0:
os.mkdir(zone+"/壁纸")
print("已创建文件夹",path)
print("壁纸都会下载在这里哦")
print("壁纸用后即删,不必担心储存空间")
while 1:
print("检测网络中...\n")
try:
if internet() == 0:
print("网络已连接\n")
break
except:
print("未联网,30秒后自动重试")
time.sleep(30)
times = input("请输入壁纸切换频率: (分钟) (输入正整数哦 默认20分钟)\n")
if times=='':
times='20'
times=int(times)
if times<=0:
sys.exit()
times=str(times)
type = input("类型选择:热门/排行榜/随机/风景/日更 \n 根据文字选择: hot/toplist/random/views/date_added (例如 选择热门则输入hot 默认hot)\n")
if type=='':
type='hot'
if type == "toplist":
frequency = input("时间选择:日排行/三日排行/七日排行/月排行/近三月排行/近六月排行/年排行 \n 根据文字选择: 1d/3d/1w/1M/3M/6M/1y (不要输错哦) \n")
else:
frequency = ""
resolution = input("最低清晰度设置 格式'xxxx'x'xxxx' 不限请输入'0x0' 默认为 1920x1080 (注意按格式输入)\n")
if resolution == "":
resolution = '1920x1080'
categories = input("是否动漫模式,默认普通模式(y/n)\n")
if categories == 'y':
categories = '010'
else:
categories = '110'
nsf=input('WARNING: r18? (Y/N) 注意:此模式需手动切换壁纸! \n')
if nsf=='Y':
global nsfw
global on
on = 0
nsfw=1
if categories=='110':
categories='111'
purity='001'
page = input("想要从第几页开始获取?(默认从第1页获取) \n")
try:
page = int(page)
except:
page = 1
print("将从第%i页开始获取\n" % page)
print("稍等,正在下载图片ing...")
print("\n国外网站,下载可能较慢")
for i in os.listdir(zone+"/壁纸"): # 清空文件夹
try:
os.remove(zone+"/壁纸/" + i)
except:
continue
if nsfw==0:
if start2 == 1:
t2=threading.Thread(target=thread2,args=(times,))
t2.start()
start2=0
starttime = datetime.datetime.now()
step4(page, categories, purity, resolution, frequency, type)
print('%d张壁纸获取成功,%d张壁纸获取失败' % (id3 - 1, id2))
id2 = 0
id3 = 1
endtime = datetime.datetime.now()
print("初始化完成,用时%i秒" % (endtime - starttime).seconds)
while 1: # 更换壁纸
if nsfw == 1:
on = input('是否继续获取下一页(y/n)')
if on == 'y':
on = 1
length = len(os.listdir(zone+"/壁纸"))
if nsfw==1 and on ==1:
length=0
on ==0
if length <= 2:
print("\n获取第%i页" % (page + 1))
while 1:
print("检测网络中...")
try:
if internet() == 0:
print("网络已连接")
break
except:
print("未联网,30秒后自动重试")
time.sleep(30)
page += 1
z1.clear()
starttime = datetime.datetime.now()
step4(page, categories, purity, resolution, frequency, type)
endtime = datetime.datetime.now()
print('%d张壁纸获取成功,%d张壁纸获取失败 用时%i秒' % (id3 - 1, id2,(endtime - starttime).seconds))
id2 = 0
id3 = 1
if nsfw==0:
if (start == 1):
thread1 = threading.Thread(target=thread)
thread1.start()
start = 0
if restart!=0:
time.sleep(int(times)*60)
print("\n*****************************************欢迎使用!************************************************")
print("\n 自动获取更换桌面壁纸 by LYX")
print("\n注意:请在网络连接条件下使用\n")
print("建议在WiFi下食用\n")
print("源网站:www.wallhaven.cc\n")
print("当前壁纸可在 %s 路径处获取 \n"%Get_Background_Path())
start = 1
lll=input('有无d盘(y/n)')
if lll=='y':
zone='D:'
else:
zone='C:'
try:
main()
except:
print("出错了哦(是不是参数输错了呢..)")
input("按任意键以退出")
总结
代码在350行左右,实现了许多功能,如自动创建壁纸文件夹,提供了许多壁纸选项,实现个性化壁纸定制,内置用户名密码,可以下载R18内容,壁纸在使用过后会自动删除,开机自动复原初始壁纸。无论作为壁纸下载器,或是自动切换壁纸软件都可以在此基础上修改实现。对于内存占用,大约在15M左右,几乎不消耗cpu,故认为对于wallhaven壁纸爬取及自动切换问题已经是一个较为成熟的解决方案。
当然美中不足便是,小黑框需要一直在后台,因为没有去写关于图形化界面的代码。。对于这个问题,我做了另一个程序,后台运行,20分钟换一次壁纸,但是不可以改参数,换句话说,一开始参数就是固定的,效果也不错,现在还在用。(可以根据上面的代码改一下)
稳定性方面:我的笔记本电脑平均在5到6天一关机,期间程序几乎没有挂掉过,稳定性还是可以的。(win10下测试)
其实对于Python语言,我也是自学的,所以可能有很多问题不太清楚,学无止境。
在使用的时候遇到的问题,可以通过邮箱联系我。
原创,转载请注明出处!
使用说明
注意:对于nsfw,以上代码需要输入自己在wallhaven的用户名和密码
可选参数版,按照提示选择适当参数,最小化命令行窗口即可,’change’切换壁纸。
日更不可改,程序自动后台运行,大约会下载三分钟,下载完成后自动切换壁纸,20分钟更换一次,每日自动更新。内置参数设定为:
categories=110&purity=100&atleast=1920x1080&topRange=1d&sorting=toplist
下载的图片默认保存在C盘’壁纸’文件夹,可选保存在D盘。
下载地址
百度云链接:https://pan.baidu.com/s/1r0fEwHbT0Sn3EcZGqe7pYA
提取码:8flb
自动切换壁纸可选参数版 快速获取(无毒放心使用)