【.net】获取网页CDM的下载链接的地址
2023-09-14 09:06:38 时间
using System; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using Framework.Core.Crawl; using HtmlAgilityPack; namespace WebCaptureSolution { static class Program { /// <summary> /// 应用程序的主入口点。 [DllImport("urlmon.dll", CharSet = CharSet.Ansi)] private static extern int UrlMkSetSessionOption(int dwOption, string pBuffer, int dwBufferLength, int dwReserved); const int URLMON_OPTION_USERAGENT = 0x10000001; const string SPUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"; [STAThread] static void Main(string[] args) { Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); string log_url = "http://www.handsupowo.pl/member.php?action=login"; string url1 = "http://www.handsupowo.pl/archive/index.php?forum-13.html"; List<string> aList = new List<string>(); List<string> lastinfo = new List<string>(); var form = new WebCapture(); form.DocumentOK = false; #region ログイン if (!string.IsNullOrEmpty(SPUserAgent)) { UrlMkSetSessionOption(URLMON_OPTION_USERAGENT, SPUserAgent, SPUserAgent.Length, 0); } form.Navigate(log_url); while (!form.DocumentOK) { Application.DoEvents(); } //step 1 login var s = form.WebBrowser.Document.GetElementById("content"); var input = s.GetElementsByTagName("input"); for (int i = 0; i < input.Count; i++) { var p = input[i]; if (p.OuterHtml.Contains("username")) { p.SetAttribute("value", "id"); } else if (p.OuterHtml.Contains("pass")) { p.SetAttribute("value", "password"); } else if (p.OuterHtml.Contains("submit")) { p.InvokeMember("Click"); break; } } var startTime = System.DateTime.Now; while ((System.DateTime.Now - startTime).TotalSeconds <= 10) { Application.DoEvents(); } // System.IO.File.WriteAllText(form.SavePath, form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml, System.Text.Encoding.UTF8); // step 2 #endregion form.DocumentOK = false; form.Navigate(url1); while (!form.DocumentOK) { Application.DoEvents(); } HtmlAgilityPack.HtmlDocument htmldoc = new HtmlAgilityPack.HtmlDocument(); htmldoc.LoadHtml(form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml); // div[contains(@class,'ads-creative')] var anodes = htmldoc.DocumentNode.SelectNodes("//div[@class='threadlist']//a"); foreach (var tn in anodes) { aList.Add(tn.Attributes["href"].Value); } // 循环访问a foreach (var cdmurl in aList) { form.DocumentOK = false; form.Navigate(cdmurl); while (!form.DocumentOK) { Application.DoEvents(); } startTime = System.DateTime.Now; while ((System.DateTime.Now - startTime).TotalSeconds <= 5) { Application.DoEvents(); } htmldoc.LoadHtml(form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml); var downloadurl = htmldoc.DocumentNode.SelectNodes("//a[@rel='nofollow']"); var info = htmldoc.DocumentNode.SelectSingleNode("//div[@id='fullversion']//a"); if (downloadurl == null) { downloadurl = htmldoc.DocumentNode.SelectNodes("//a[@target='_blank']"); } List<string> dllist = new List<string>(); if (downloadurl.Count == 1) { dllist.Add(info.InnerText); dllist.Add(downloadurl[0].Attributes["href"].Value); } else { foreach (var dl in downloadurl) { if (dllist.Count == 0) { dllist.Add(info.InnerText); } else { dllist.Add(dl.Attributes["href"].Value); } } } lastinfo.Add(string.Join(Environment.NewLine, dllist.ToArray())); lastinfo.Add(Environment.NewLine); } System.IO.File.WriteAllLines(@"D:\Nodejs\myjs\DownLoadUrl.txt", lastinfo.ToArray(), Encoding.UTF8); } } }
相关文章
- .Net 下未捕获异常的处理
- 【ASP.NET】验证控件
- 网页优化系列二:使用Cache缓存静态文件、图片(asp.net版)
- ASP.NET的Web网页如何进行分页操作(Demo举例)
- ASP.NET的Web网页如何进行分页操作(Demo举例)
- ASP.NET关于书籍详情和删除的Demo(HttpHandler进行页面静态化[自动生成html网页]+Entity Framework通过类创建数据库+EF删查)
- ASP.NET中AJAX的异步加载(Demo演示)
- asp.net C#检查URL是否有效
- 在ASP.NET中发送电子邮件的实例教程
- ASP.NET Core Authentication in a Load Balanced Environment with HAProxy and Redis
- 重新整理 .net core 实践篇————防跨站脚本攻击[四十]
- 详解ASP.NET Core WebApi 返回统一格式参数
- .NET平台开源项目速览(4).NET文档生成工具ADB及使用
- 这些.NET开源项目你知道吗?让.NET开源来得更加猛烈些吧!(第二辑)
- CV之ModelScope:基于ModelScope框架的人脸人像数据集利用DCT-Net算法实现人像卡通化图文教程之详细攻略
- 深入探究ASP.NET Core读取Request.Body的正确方式
- TVideoGrabber SDK 15.2.4.6 for .NET/ACTIVEX/OCX
- docker: Error response from daemon: Get “https://registry-1.docker.io/v2/“: net/http: TLS handshake