Arzon.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using AVSORTER;
  6. using System.Text.RegularExpressions;
  7. using HtmlAgilityPack;
  8. using System.IO;
  9. using System.Runtime.Serialization.Formatters.Binary;
  10. namespace Gets
  11. {
  12. public class Arzon : AVSORTER.IGetable, ICloneable
  13. {
  14. Uri uri;
  15. MyWebClient wc;
  16. /// <summary>
  17. /// 构造函数
  18. /// </summary>
  19. /// <param name="IsInit">决定是否初始化Cookie,初始化Cookie需要耗费时间并且需要联网,或者根据需要手动调用CookiesInit()方法</param>
  20. public Arzon(bool IsInit)
  21. {
  22. wc = new MyWebClient();
  23. if (IsInit)
  24. {
  25. CookiesInit();
  26. }
  27. }
  28. public void CookiesInit()
  29. {
  30. const string url = @"http://www.arzon.jp/index.php?action=adult_customer_agecheck&agecheck=1";//&redirect=http%3A%2F%2Fwww.arzon.jp%2F";
  31. uri = new Uri(url);
  32. wc.DownloadDataCompleted += wc_DownloadDataCompleted;
  33. wc.DownloadDataAsync(uri, "init");
  34. }
  35. public event EventHandler InitCompleted;
  36. void wc_DownloadDataCompleted(object sender, System.Net.DownloadDataCompletedEventArgs e)
  37. {
  38. if ((e.UserState as string) == "init")
  39. {
  40. this.IsInitCompleted = true;
  41. if (this.InitCompleted!=null)
  42. {
  43. this.InitCompleted(this, new EventArgs());
  44. }
  45. }
  46. }
  47. public bool IsInitCompleted { get; set; }
  48. public List<MovieBasic> Query(string fcode)
  49. {
  50. Uri u = urlCombine(fcode);
  51. HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
  52. doc.LoadHtml(wc.GetHTML(u));
  53. //doc.DocumentNode.SelectNodes("//li[@class='saledate']/span")[3].InnerText
  54. var listitems = doc.DocumentNode.SelectNodes("//div[@id='listitem']/table/tr/td/div[@class='data']/ul[1]/li[2]");
  55. var htmnode = doc.DocumentNode.SelectSingleNode("//div[@class='autopagerize_page_element']");
  56. if (htmnode == null)
  57. {
  58. //查无此片
  59. return new List<MovieBasic>();
  60. }
  61. string htm = htmnode.InnerHtml;
  62. List<MovieBasic> l = new List<MovieBasic>();
  63. //<div class="autopagerize_page_element">
  64. int cou = doc.DocumentNode.SelectNodes("//div[@id='itemd']").Count;
  65. for (int i = 0; i < cou; i++)
  66. {
  67. string n_title = doc.DocumentNode.SelectNodes("//div[@id='itemd']")[i].ChildNodes["ul"].ChildNodes["li"].ChildNodes["h3"].InnerText;
  68. n_title = Tools.RemoveInvalidChars(n_title);
  69. string n_itemURL = "http://" + u.Host + doc.DocumentNode.SelectNodes("//div[@id='itemd']")[i].ChildNodes["ul"].ChildNodes["li"].ChildNodes["h3"].ChildNodes["a"].Attributes["href"].Value;
  70. string n_date = doc.DocumentNode.SelectSingleNode("//div[@id='itemd']/ul/li/span[@class='date']").InnerText;
  71. var datanode = doc.DocumentNode.SelectNodes("//div[@class='data']")[i];
  72. HtmlDocument docData = new HtmlDocument();
  73. docData.LoadHtml(datanode.InnerHtml);
  74. int k = docData.DocumentNode.SelectNodes("//ul[1]/li").Count;
  75. string n_actor = string.Empty;
  76. List<string> actors = new List<string>();
  77. if (k > 1)
  78. {
  79. for (int j = k; j > 1; j--)
  80. {
  81. n_actor = docData.DocumentNode.SelectNodes("//ul[1]/li[" + j + "]")[0].InnerText.Trim();
  82. actors.Add(n_actor);
  83. }
  84. }
  85. string n_maker = docData.DocumentNode.SelectSingleNode("//ul[2]/li[2]").InnerText.Trim();
  86. string n_company = docData.DocumentNode.SelectSingleNode("//ul[3]/li[2]").InnerText.Trim();
  87. var n_ticai_node = docData.DocumentNode.SelectSingleNode("//ul[4]/li[2]");
  88. string n_ticai;
  89. if (n_ticai_node == null)
  90. {
  91. n_ticai = string.Empty;
  92. }
  93. else
  94. {
  95. n_ticai = n_ticai_node.InnerText.Trim();
  96. }
  97. string s_img = doc.DocumentNode.SelectNodes("//div[@id='listitem']/table/tr/td/div/a/img")[i].Attributes["src"].Value;
  98. MovieBasic mb = new MovieBasic()
  99. {
  100. Title = n_title,
  101. ItemURL = n_itemURL,
  102. Img_s = s_img,
  103. Actor = actors,
  104. Maker = n_company,
  105. Label = n_ticai
  106. };
  107. l.Add(mb);
  108. }
  109. return l;
  110. //List<MovieBasic> l = new List<MovieBasic>();
  111. //for (int i = 0; i < listitems.Count; i++)
  112. //{
  113. // string title = doc.DocumentNode.SelectNodes("//div[@id='listitem']/table/tr/td/div/a")[i].Attributes["title"].Value;
  114. // string itemurl = "http://" + uri.Host + doc.DocumentNode.SelectNodes("//div[@id='listitem']/table/tr/td/div/a")[i].Attributes["href"].Value;
  115. // string actor = doc.DocumentNode.SelectNodes("//div[@id='listitem']/table/tr/td/div[@class='data']/ul[1]/li[2]")[i].InnerText.Trim();
  116. // string marker = doc.DocumentNode.SelectNodes("//div[@id='listitem']/table/tr/td/div[@class='data']/ul[2]/li[2]")[i].InnerText.Trim();
  117. // string label = doc.DocumentNode.SelectNodes("//div[@id='listitem']/table/tr/td/div[@class='data']/ul[3]/li[2]")[i].InnerText.Trim();
  118. // string s_img = doc.DocumentNode.SelectNodes("//div[@id='listitem']/table/tr/td/div/a/img")[i].Attributes["src"].Value;
  119. // //GetMovie(itemurl, cookies);
  120. // //GetImage(itemurl, cookies);
  121. // //http://www.arzon.jp/itemlist.html?t=&m=all&s=&q=iptd+999
  122. // MovieBasic mb = new MovieBasic()
  123. // {
  124. // Title = title,
  125. // ItemURL = itemurl,
  126. // Actor = actor,
  127. // Img_s = s_img,
  128. // Label = label,
  129. // Maker = marker
  130. // };
  131. // l.Add(mb);
  132. //}
  133. }
  134. /// <summary>
  135. /// this is the old version.
  136. /// </summary>
  137. /// <param name="basic"></param>
  138. /// <returns></returns>
  139. //public Movie GetMovie(MovieBasic basic)
  140. //{
  141. // try
  142. // {
  143. // string html = wc.GetHTML(new Uri(basic.ItemURL));
  144. // var docc = new HtmlDocument();
  145. // docc.LoadHtml(html);
  146. // //web DOM Changed 2014/12/7
  147. // string Title = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/div[@class='detail_title']/h1").InnerText.Trim();
  148. // //string Title = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/div[@class='detail_title_new']/h1").InnerHtml;
  149. // Title = Tools.RemoveInvalidChars(Title);
  150. // while (Title.IndexOf("&nbsp;") != -1)
  151. // {
  152. // Title= Title.Replace("&nbsp;", "");
  153. // }
  154. // while (Title.IndexOf("廃盤") != -1)
  155. // {
  156. // Title = Title.Replace("廃盤", "");
  157. // }
  158. // //var ddd ="[MIDD-983]Baby Entertainment×MOODYZコラボ作品 淫神の女泥棒 哀しき痙攣の追憶 Dear.F 1 恥辱的、屈辱的なイカせの拷問! 反反复复反反复复方法";
  159. // if (Title.Length>82)
  160. // {
  161. // Title = Title.Substring(0, 81);
  162. // }
  163. // //string Title = basic.Title;
  164. // string label = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[3]/td/a").InnerText.Trim();
  165. // string changjia = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[2]/td/a").InnerText.Trim();
  166. // string jiandu = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[5]/td[2]").InnerText.Trim();
  167. // string date = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[6]/td[2]").InnerText.Trim();
  168. // //2008/01/25 (DVD レンタル版)
  169. // System.Text.RegularExpressions.Regex r = new System.Text.RegularExpressions.Regex(@"\d{4}/\d{2}/\d{2}");
  170. // if (r.IsMatch(date))
  171. // {
  172. // date = r.Match(date).Value;
  173. // }
  174. // else
  175. // {
  176. // date = "1900/01/01";
  177. // }
  178. // DateTime dtime = DateTime.Parse(date);
  179. // string minutes = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[7]/td[2]").InnerText.Trim();
  180. // string f_code = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[8]/td[2]").InnerText.Trim();
  181. // while (f_code.IndexOf("&nbsp;")!=-1)
  182. // {
  183. // f_code = f_code.Replace("&nbsp;", "");
  184. // }
  185. // while (f_code.IndexOf("廃盤")!=-1)
  186. // {
  187. // f_code = f_code.Replace("廃盤", "");
  188. // }
  189. // f_code = Tools.Fcode(f_code);
  190. // string xilie = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[4]/td[2]").InnerText.Trim();
  191. // //f_code = Tools.Fcode(f_code);
  192. // string intro = docc.DocumentNode.SelectSingleNode("//table[@class='item_detail']/tr/td[@class='text']").InnerText.Trim();
  193. // string coverImg = docc.DocumentNode.SelectSingleNode("//table[@class='item_detail']/tr/td/div/a").Attributes["href"].Value.Trim();
  194. // Movie m = new Movie()
  195. // {
  196. // Actor = basic.Actor,
  197. // Title = Title,
  198. // Lable = label,
  199. // Maker = changjia,
  200. // ReleaseDate = dtime,
  201. // Minutes = minutes,
  202. // AVCode = f_code,
  203. // Introduction = intro,
  204. // CoverURL = coverImg,
  205. // ItemURL = basic.ItemURL,
  206. // Series = xilie,
  207. // Producer = jiandu
  208. // };
  209. // //Console.WriteLine(string.Format("Actor:{0}\r\nTitle:{1}\r\nLabel:{2}\r\nMaker:{3}\r\nReleaseDate:{4}\r\n番号:{5}\r\n ", m.Actor[0], m.Title, m.Lable, m.Maker, m.ReleaseDate.ToShortDateString(), m.AVCode));
  210. // return m;
  211. // }
  212. // catch (Exception)
  213. // {
  214. // throw new Exception("获取影片信息失败!");
  215. // }
  216. //}
  217. public Movie GetMovie(MovieBasic basic)
  218. {
  219. try
  220. {
  221. string html = wc.GetHTML(new Uri(basic.ItemURL));
  222. var docc = new HtmlDocument();
  223. docc.LoadHtml(html);
  224. //web DOM Changed 2014/12/7
  225. string Title = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/div[@class='detail_title_new']/h1").InnerText.Trim();
  226. string coverImg = docc.DocumentNode.SelectSingleNode("//*[@id='detail_new']/table/tr/td[1]/table/tr[1]/td[1]/div/a[1]").Attributes["href"].Value.ToString().Trim();
  227. //AV女優:
  228. var item1 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[1]/td[2]").InnerText.Trim();
  229. //AVメーカー 制造厂
  230. var item2 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[2]/td[2]").InnerText.Trim();
  231. //AVレーベル 唱片公司
  232. var item3 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[3]/td[2]").InnerText.Trim();
  233. //シリーズ 系列
  234. var item4 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[4]/td[2]").InnerText.Trim();
  235. //監督 导演
  236. var item5 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[5]/td[2]").InnerText.Trim();
  237. //発売日:
  238. var item6 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[6]/td[2]").InnerText.Trim();
  239. //収録時間
  240. var item7 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[7]/td[2]").InnerText.Trim();
  241. //品番
  242. var item8 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[8]/td[2]").InnerText.Trim();
  243. //タグ: 标签
  244. var item9 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[9]/td[2]").InnerText.Trim();
  245. //作品紹介
  246. string descrition;
  247. try
  248. {
  249. descrition = docc.DocumentNode.SelectSingleNode("//div[@class='item_text']").InnerText.Trim();
  250. }
  251. catch (Exception)
  252. {
  253. descrition = "";
  254. }
  255. Movie m = new Movie()
  256. {
  257. Actor = basic.Actor,
  258. Title = titleCleaner(Title),
  259. Lable = item9,
  260. Maker = item2,//ideapocket
  261. ReleaseDate = dateCleaner(item6),
  262. Minutes = item7,
  263. AVCode = f_codeCleaner(item8),
  264. Introduction = descrition,
  265. CoverURL = coverImg,
  266. ItemURL = basic.ItemURL,
  267. Series = item4,
  268. //导演
  269. Producer = item5
  270. };
  271. //Console.WriteLine(string.Format("Actor:{0}\r\nTitle:{1}\r\nLabel:{2}\r\nMaker:{3}\r\nReleaseDate:{4}\r\n番号:{5}\r\n ", m.Actor[0], m.Title, m.Lable, m.Maker, m.ReleaseDate.ToShortDateString(), m.AVCode));
  272. return m;
  273. }
  274. catch (Exception)
  275. {
  276. throw new Exception("获取影片信息失败!");
  277. }
  278. }
  279. //标题除杂
  280. private string titleCleaner(string title)
  281. {
  282. title = Tools.RemoveInvalidChars(title);
  283. while (title.IndexOf("&nbsp;") != -1)
  284. {
  285. title = title.Replace("&nbsp;", "");
  286. }
  287. while (title.IndexOf("廃盤") != -1)
  288. {
  289. title = title.Replace("廃盤", "");
  290. }
  291. //var ddd ="[MIDD-983]Baby Entertainment×MOODYZコラボ作品 淫神の女泥棒 哀しき痙攣の追憶 Dear.F 1 恥辱的、屈辱的なイカせの拷問! 反反复复反反复复方法";
  292. if (title.Length > 82)
  293. {
  294. title = title.Substring(0, 81);
  295. }
  296. return title;
  297. }
  298. //日期
  299. private DateTime dateCleaner(string date)
  300. {
  301. System.Text.RegularExpressions.Regex r = new System.Text.RegularExpressions.Regex(@"\d{4}/\d{2}/\d{2}");
  302. if (r.IsMatch(date))
  303. {
  304. date = r.Match(date).Value;
  305. }
  306. else
  307. {
  308. date = "1900/01/01";
  309. }
  310. return DateTime.Parse(date);
  311. }
  312. //番号
  313. private string f_codeCleaner(string f_code)
  314. {
  315. while (f_code.IndexOf("&nbsp;") != -1)
  316. {
  317. f_code = f_code.Replace("&nbsp;", "");
  318. }
  319. while (f_code.IndexOf("廃盤") != -1)
  320. {
  321. f_code = f_code.Replace("廃盤", "");
  322. }
  323. f_code = Tools.Fcode(f_code);
  324. return f_code;
  325. }
  326. public bool GetCover(Movie mo)
  327. {
  328. wc.ReferURL = mo.ItemURL;
  329. FileInfo f = new FileInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Cover", mo.AVCode + ".jpg"));
  330. try
  331. {
  332. if (!Directory.Exists(Path.GetDirectoryName(f.FullName)))
  333. {
  334. Directory.CreateDirectory(Path.GetDirectoryName(f.FullName));
  335. }
  336. if (!File.Exists(f.FullName))
  337. {
  338. wc.DownloadFile(mo.CoverURL, f.FullName);
  339. }
  340. else
  341. {
  342. Console.WriteLine("已有封面 " + mo.Title);
  343. }
  344. }
  345. catch (Exception err)
  346. {
  347. return false;
  348. }
  349. mo.CoverFile = f.FullName;
  350. return true;
  351. }
  352. Uri urlCombine(string fcode)
  353. {
  354. string ur = "http://www.arzon.jp/itemlist.html?t=&m=all&s=&mkt=all&disp=30&sort=-saledate&list=list&q=" + fcode;
  355. return new Uri(ur);
  356. }
  357. public object Clone()
  358. {
  359. //MemoryStream ms = new MemoryStream();
  360. //BinaryFormatter bf = new BinaryFormatter();
  361. //bf.Serialize(ms, this);
  362. //ms.Seek(0, 0);
  363. //object value = bf.Deserialize(ms);
  364. //ms.Close();
  365. //return value;//this.MemberwiseClone();
  366. MemoryStream ms = new MemoryStream();
  367. BinaryFormatter bf = new BinaryFormatter();
  368. bf.Serialize(ms, this.wc.m_container);
  369. ms.Seek(0, 0);
  370. object value = bf.Deserialize(ms);
  371. Arzon ar = new Arzon(false);
  372. ar.wc.m_container = value as System.Net.CookieContainer;
  373. return ar;
  374. }
  375. public List<MovieBasic> FindInURL(string url)
  376. {
  377. HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
  378. doc.LoadHtml(wc.GetHTML(new Uri(url)));
  379. var nodes = doc.DocumentNode.SelectNodes("//div[@class='pictlist']");
  380. List<MovieBasic> list = new List<MovieBasic>();
  381. if ( nodes==null)
  382. {
  383. return list;
  384. }
  385. foreach (var item in nodes)
  386. {
  387. var itemurl = "http://www.arzon.jp" + item.SelectSingleNode(".//dt/a").Attributes["href"].Value.ToString().Trim();
  388. var title = item.SelectSingleNode(".//dt/a").Attributes["title"].Value.ToString().Trim();
  389. var img = item.SelectSingleNode(".//img[1]").Attributes["src"].Value.ToString().Trim();
  390. list.Add(new MovieBasic() { ItemURL = itemurl, Img_s = img, Title = title });
  391. }
  392. return list;
  393. }
  394. }
  395. }