吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 1233|回复: 8
收起左侧

[其他原创] C# 多线程缓存技术小说极速爬取

  [复制链接]
TZ糖纸 发表于 2024-8-14 10:08
[C#] 纯文本查看 复制代码
internal class Program
{
    static async Task Main(string[] args)
    {

        var redis = Redis.GetInstance();

        Book book = new Book();
        book.Url = "http://www.tycqzw.net/113_113116/";
        await book.SetItems();

        await book.SetItemsContent();

        var items = book.Items.OrderBy(x => x.Path).ToList();
        
        StringBuilder stringBuilder = new StringBuilder();  

        foreach (var item in items)
        {
            stringBuilder.Append(item.Title+"\n");
            stringBuilder.Append(item.Content);
        }

        File.WriteAllText(book.Title+".txt", stringBuilder.ToString());

        Console.ReadKey();

    }
}




[C#] 纯文本查看 复制代码
public class Book
{

    public Book()
    {
        if (Items == null) Items = new List<BookItem>();
    }
    public string Title { get; set; }
    public string Url { get; set; }
    public List<BookItem> Items { get; set; }

    public void Add(BookItem item)
    {
        if (Items.Where(n => n.Path == item.Path).Count() > 0) return;
        Items.Add(item);
    }

    internal async Task SetItems()
    {
        var redis = Redis.GetInstance();

        var json = redis.Get(this.Title);
        if (!string.IsNullOrWhiteSpace(json))
        {
            this.Items = JsonConvert.DeserializeObject<List<BookItem>>(json);
            return;
        }

        var client = new RestClient(this.Url);
        var request = new RestRequest();
        var response = await client.ExecuteAsync(request);
        var content = response.Content;
        var parser = new HtmlParser();
        var doc = parser.ParseDocument(content);

        var title = doc.QuerySelector("#info h1").InnerHtml;
        var dda = doc.QuerySelectorAll("dd a");

        this.Title = title;
        Console.WriteLine(this.Title);
        foreach (IHtmlAnchorElement item in dda)
        {
            Items.Add(new BookItem() { Title = item.InnerHtml, Path = item.PathName });

        }
        redis.Set(this.Title, JsonConvert.SerializeObject(this.Items));

    }

    internal async Task SetItemsContent()
    {
        var index = 0;
        var redis = Redis.GetInstance();


        // 初始化信号量,允许的最大并发数为10
        SemaphoreSlim semaphore = new SemaphoreSlim(1000, 1000);
        // 任务列表
        List<Task> tasks = new List<Task>();

        async Task ProcessTask(BookItem bookItem)
        {
            await semaphore.WaitAsync();
            try
            {
                string mess = "";
                mess += bookItem.Title;
                await bookItem.SetContent();
                mess += "-获取完成";
                index++;
                if (index % 10 == 0)
                {
                    redis.Set(this.Title, JsonConvert.SerializeObject(this.Items));
                    mess += "-写入缓存";
                }
                Console.WriteLine(mess);
            }
            finally
            {
                // 释放信号量,允许下一个任务开始
                semaphore.Release();
            }
        }

        foreach (var item in this.Items)
        {
            if (item.Content == null)
            {

                tasks.Add(ProcessTask(item));
            }

        }

        // 等待所有任务完成
        await Task.WhenAll(tasks);

        redis.Set(this.Title, JsonConvert.SerializeObject(this.Items));
    }
}
public class BookItem
{
    public string Title { get; set; }
    public string Path { get; set; }
    public string Content { get; set; }

    internal async Task SetContent()
    {
        try
        {
            var client = new RestClient("http://www.tycqzw.net/" + Path);
            var request = new RestRequest();
            var response = await client.ExecuteAsync(request);
            var content = response.Content;
            var parser = new HtmlParser();
            var doc = parser.ParseDocument(content);
            IHtmlDivElement contentMain = doc.QuerySelector("#content") as IHtmlDivElement;
            this.Content = contentMain.TextContent;
        }
        catch (Exception)
        {

        }

    }
}



[C#] 纯文本查看 复制代码
public class Redis
{
    private static RedisClient redisClient;

    private Redis() { }

    public static RedisClient GetInstance()
    {
        if (redisClient == null) {
            redisClient = new RedisClient("127.0.0.1:6379,defaultDatabase=12");
        }

        return redisClient;
    }
}

e4d6d2de0be7974b7249f2fcdc10db9.png

免费评分

参与人数 6吾爱币 +13 热心值 +6 收起 理由
yekai2024 + 1 + 1 我很赞同!
苏紫方璇 + 7 + 1 欢迎分析讨论交流,吾爱破解论坛有你更精彩!
1783780690 + 1 + 1 热心回复!
a5436539 + 1 + 1 我很赞同!
williamlyf + 1 + 1 学习,加个注释就更完美了...
wkfy + 2 + 1 我很赞同!

查看全部评分

本帖被以下淘专辑推荐:

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

yhzh 发表于 2024-8-14 12:41
感谢分享。。
小小小酥 发表于 2024-8-14 13:38
SONGXINGJING520 发表于 2024-8-15 10:32
caisonglinlove 发表于 2024-8-16 03:50
感谢分享,大神牛x,必须点赞
msmvc 发表于 2024-8-17 19:01
看论坛多数是用python的,我用C Sharp
findevery 发表于 2024-8-23 13:34
学习了!!!
81169009 发表于 2024-9-5 18:40
不会用,大神谁帮忙打包下成品啊
xyj152 发表于 2024-9-6 08:15
好东西,抽空要学习一下
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2024-12-15 17:30

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表