import json
import os
import requests
import concurrent.futures
from urllib.parse import urlparse
# 配置信息
headers = {
"cookie": "", # 填写账号Cookie里的PHPSESSID或全部键值对
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)",
}
user_id = "" # 填写PHPSESSID前面的数字
proxies = None # {"https": None}
bookmarks = {} # 收藏列表
history = {} # 下载历史
# 初始化文件
if not os.path.exists("bookmarks.json"):
with open("bookmarks.json", "w") as f:
f.write("{}")
if not os.path.exists("history.json"):
with open("history.json", "w") as f:
f.write("{}")
bookmarks = json.loads(open("bookmarks.json").read())
history = json.loads(open("history.json").read())
# 检查登录状态
def check_login_status():
try:
response = requests.get(
"https://www.pixiv.net/ajax/my_profile", headers=headers, proxies=proxies
)
return not response.json()["error"]
except:
return False
# 更新收藏列表
def update_bookmarks():
offset = 0
limit = 100
total = None
incremental = []
repeat_count = 0
while True:
print(f"Loading page {int((offset + limit) / limit)}")
url = f"https://www.pixiv.net/ajax/user/{user_id}/illusts/bookmarks?tag=&offset={offset}&limit={limit}&rest=show&lang=zh"
data = requests.get(url, headers=headers, proxies=proxies).json()
if total is None:
total = data["body"]["total"]
for bookmark in data["body"]["works"]:
if bookmark["id"] not in bookmarks:
bookmarks[bookmark["id"]] = bookmark
incremental.append(bookmark["id"])
repeat_count = 0 # 重置计数器
print(f"New illustration: {bookmark['id']}")
else:
repeat_count += 1 # 增加计数器
if repeat_count >= 10: # 检查是否达到10个连续重复
print("跳出循环") # 连续重复
break
offset += limit
if incremental:
print("...ok")
else:
print("No new illustrations")
if offset >= total or repeat_count >= 10:
break
total_bookmarks = len(bookmarks)
print(f"Total bookmarks: {total_bookmarks}")
with open("bookmarks.json", "w") as f:
json.dump(bookmarks, f, indent=4)
return bookmarks
# 获取插画数据
def fetch_illustration_data():
def get_picture(pid):
if "picture" not in bookmarks[pid]:
url = f"https://www.pixiv.net/ajax/illust/{pid}/pages"
bookmarks[pid]["picture"] = requests.get(
url, headers=headers, proxies=proxies
).json()
return pid
return None
with concurrent.futures.ThreadPoolExecutor(max_workers=25) as executor:
futures = {executor.submit(get_picture, pid): pid for pid in bookmarks}
for i, future in enumerate(concurrent.futures.as_completed(futures)):
pid = futures[future]
try:
result = future.result()
if result and i % 50 == 0:
with open("bookmarks.json", "w") as f:
json.dump(bookmarks, f, indent=4)
except Exception as e:
print(f"Error fetching data for {pid}: {e}")
with open("bookmarks.json", "w") as f:
json.dump(bookmarks, f, indent=4)
# 下载图片
def download_image(url, path):
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)",
"origin": "https://www.pixiv.net/",
"referer": "https://www.pixiv.net/",
}
try:
response = requests.get(url, headers=headers, proxies=proxies)
if response.status_code == 200 and response.content:
with open(path, "wb") as file:
file.write(response.content)
return os.path.exists(path) and os.path.getsize(path) > 0
return False
except Exception as e:
print(f"Download failed: {e}")
return False
# 提取文件名
def extract_filename(url):
try:
return urlparse(url).path.split("/")[-1]
except:
print(f"Error parsing URL: {url}")
return None
# 下载插画
def download_illustrations():
def download_page(pid, page_index, page):
key = f"{pid}_{page_index}"
if key not in history:
print(f"Downloading {key}.jpg", end=" ")
filename = extract_filename(page["urls"]["original"])
if filename and download_image(
page["urls"]["original"], f"pixiv/{filename}"
):
history[key] = True
with open("history.json", "w") as f:
json.dump(history, f, indent=4)
print("...ok")
else:
print("...fail")
else:
pass # Already downloaded
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
futures = []
for pid in bookmarks:
if "picture" in bookmarks[pid] and "body" in bookmarks[pid]["picture"]:
pages = bookmarks[pid]["picture"]["body"]
for page_index, page in enumerate(pages):
futures.append(
executor.submit(download_page, pid, page_index, page)
)
for future in concurrent.futures.as_completed(futures):
try:
future.result()
except Exception as e:
print(f"Download error: {e}")
# 主程序
if __name__ == "__main__":
if not check_login_status():
print("Login failed")
else:
print("Login successful")
update_bookmarks()
fetch_illustration_data()
download_illustrations()