Arzon.cs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using AVSORTER;
  6. using System.Text.RegularExpressions;
  7. using HtmlAgilityPack;
  8. using System.IO;
  9. using System.Runtime.Serialization.Formatters.Binary;
  10. namespace Gets
  11. {
  12. public class Arzon : AVSORTER.IGetable, ICloneable
  13. {
  14. Uri uri;
  15. MyWebClient wc;
  16. /// <summary>
  17. /// 构造函数
  18. /// </summary>
  19. /// <param name="IsInit">决定是否初始化Cookie,初始化Cookie需要耗费时间并且需要联网,或者根据需要手动调用CookiesInit()方法</param>
  20. public Arzon(bool IsInit)
  21. {
  22. wc = new MyWebClient();
  23. if (IsInit)
  24. {
  25. CookiesInit();
  26. }
  27. }
  28. public void CookiesInit()
  29. {
  30. const string url = @"http://www.arzon.jp/index.php?action=adult_customer_agecheck&agecheck=1";//&redirect=http%3A%2F%2Fwww.arzon.jp%2F";
  31. uri = new Uri(url);
  32. wc.DownloadDataCompleted += wc_DownloadDataCompleted;
  33. wc.DownloadDataAsync(uri, "init");
  34. }
  35. public event EventHandler InitCompleted;
  36. void wc_DownloadDataCompleted(object sender, System.Net.DownloadDataCompletedEventArgs e)
  37. {
  38. if ((e.UserState as string) == "init")
  39. {
  40. if (e.Error==null)
  41. {
  42. this.IsInitCompleted = true;
  43. if (this.InitCompleted != null)
  44. {
  45. this.InitCompleted(this, new EventArgs());
  46. }
  47. }
  48. }
  49. }
  50. public bool IsInitCompleted { get; set; }
  51. public List<MovieBasic> Query(string fcode)
  52. {
  53. Uri u = urlCombine(fcode);
  54. return PageParse(u);
  55. }
  56. private List<MovieBasic> PageParse(Uri u)
  57. {
  58. HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
  59. doc.LoadHtml(wc.GetHTML(u));
  60. //doc.DocumentNode.SelectNodes("//li[@class='saledate']/span")[3].InnerText
  61. var listitems = doc.DocumentNode.SelectNodes("//div[@id='listitem']/table/tr/td/div[@class='data']/ul[1]/li[2]");
  62. var htmnode = doc.DocumentNode.SelectSingleNode("//div[@class='autopagerize_page_element']");
  63. if (htmnode == null)
  64. {
  65. //查无此片
  66. return new List<MovieBasic>();
  67. }
  68. string htm = htmnode.InnerHtml;
  69. List<MovieBasic> l = new List<MovieBasic>();
  70. //<div class="autopagerize_page_element">
  71. int cou = doc.DocumentNode.SelectNodes("//div[@id='itemd']").Count;
  72. for (int i = 0; i < cou; i++)
  73. {
  74. string n_title = doc.DocumentNode.SelectNodes("//div[@id='itemd']//h2/a")[i].InnerText;
  75. n_title = Tools.RemoveInvalidChars(n_title);
  76. string n_itemURL = "http://" + u.Host + doc.DocumentNode.SelectNodes("//div[@id='itemd']//h2/a")[i].Attributes["href"].Value;
  77. string n_date = doc.DocumentNode.SelectNodes("//div[@id='itemd']//span")[i].InnerText.Trim();
  78. var datanode = doc.DocumentNode.SelectNodes("//div[@class='data']")[i];
  79. HtmlDocument docData = new HtmlDocument();
  80. docData.LoadHtml(datanode.InnerHtml);
  81. int k = docData.DocumentNode.SelectNodes("//ul[1]/li").Count;
  82. string n_actor = string.Empty;
  83. List<string> actors = new List<string>();
  84. if (k > 1)
  85. {
  86. for (int j = k; j > 1; j--)
  87. {
  88. n_actor = docData.DocumentNode.SelectNodes("//ul[1]/li[" + j + "]")[0].InnerText.Trim();
  89. actors.Add(n_actor);
  90. }
  91. }
  92. string n_maker = docData.DocumentNode.SelectSingleNode("//ul[2]/li[2]").InnerText.Trim();
  93. string n_company = docData.DocumentNode.SelectSingleNode("//ul[3]/li[2]").InnerText.Trim();
  94. var n_ticai_node = docData.DocumentNode.SelectSingleNode("//ul[4]/li[2]");
  95. string n_ticai;
  96. if (n_ticai_node == null)
  97. {
  98. n_ticai = string.Empty;
  99. }
  100. else
  101. {
  102. n_ticai = n_ticai_node.InnerText.Trim();
  103. }
  104. string s_img = doc.DocumentNode.SelectNodes("//div[@id='listitem']/table/tr/td/div/a/img")[i].Attributes["src"].Value;
  105. MovieBasic mb = new MovieBasic()
  106. {
  107. Title = n_title,
  108. ItemURL = n_itemURL,
  109. Img_s = s_img,
  110. Actor = actors,
  111. Maker = n_company,
  112. Label = n_ticai
  113. };
  114. l.Add(mb);
  115. }
  116. return l;
  117. }
  118. /// <summary>
  119. /// this is the old version.
  120. /// </summary>
  121. /// <param name="basic"></param>
  122. /// <returns></returns>
  123. //public Movie GetMovie(MovieBasic basic)
  124. //{
  125. // try
  126. // {
  127. // string html = wc.GetHTML(new Uri(basic.ItemURL));
  128. // var docc = new HtmlDocument();
  129. // docc.LoadHtml(html);
  130. // //web DOM Changed 2014/12/7
  131. // string Title = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/div[@class='detail_title']/h1").InnerText.Trim();
  132. // //string Title = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/div[@class='detail_title_new']/h1").InnerHtml;
  133. // Title = Tools.RemoveInvalidChars(Title);
  134. // while (Title.IndexOf("&nbsp;") != -1)
  135. // {
  136. // Title= Title.Replace("&nbsp;", "");
  137. // }
  138. // while (Title.IndexOf("廃盤") != -1)
  139. // {
  140. // Title = Title.Replace("廃盤", "");
  141. // }
  142. // //var ddd ="[MIDD-983]Baby Entertainment×MOODYZコラボ作品 淫神の女泥棒 哀しき痙攣の追憶 Dear.F 1 恥辱的、屈辱的なイカせの拷問! 反反复复反反复复方法";
  143. // if (Title.Length>82)
  144. // {
  145. // Title = Title.Substring(0, 81);
  146. // }
  147. // //string Title = basic.Title;
  148. // string label = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[3]/td/a").InnerText.Trim();
  149. // string changjia = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[2]/td/a").InnerText.Trim();
  150. // string jiandu = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[5]/td[2]").InnerText.Trim();
  151. // string date = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[6]/td[2]").InnerText.Trim();
  152. // //2008/01/25 (DVD レンタル版)
  153. // System.Text.RegularExpressions.Regex r = new System.Text.RegularExpressions.Regex(@"\d{4}/\d{2}/\d{2}");
  154. // if (r.IsMatch(date))
  155. // {
  156. // date = r.Match(date).Value;
  157. // }
  158. // else
  159. // {
  160. // date = "1900/01/01";
  161. // }
  162. // DateTime dtime = DateTime.Parse(date);
  163. // string minutes = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[7]/td[2]").InnerText.Trim();
  164. // string f_code = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[8]/td[2]").InnerText.Trim();
  165. // while (f_code.IndexOf("&nbsp;")!=-1)
  166. // {
  167. // f_code = f_code.Replace("&nbsp;", "");
  168. // }
  169. // while (f_code.IndexOf("廃盤")!=-1)
  170. // {
  171. // f_code = f_code.Replace("廃盤", "");
  172. // }
  173. // f_code = Tools.Fcode(f_code);
  174. // string xilie = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']/table/tr/td/table[@class='item_detail']/tr/td[@class='caption']/table[@class='item']/tr[4]/td[2]").InnerText.Trim();
  175. // //f_code = Tools.Fcode(f_code);
  176. // string intro = docc.DocumentNode.SelectSingleNode("//table[@class='item_detail']/tr/td[@class='text']").InnerText.Trim();
  177. // string coverImg = docc.DocumentNode.SelectSingleNode("//table[@class='item_detail']/tr/td/div/a").Attributes["href"].Value.Trim();
  178. // Movie m = new Movie()
  179. // {
  180. // Actor = basic.Actor,
  181. // Title = Title,
  182. // Lable = label,
  183. // Maker = changjia,
  184. // ReleaseDate = dtime,
  185. // Minutes = minutes,
  186. // AVCode = f_code,
  187. // Introduction = intro,
  188. // CoverURL = coverImg,
  189. // ItemURL = basic.ItemURL,
  190. // Series = xilie,
  191. // Producer = jiandu
  192. // };
  193. // //Console.WriteLine(string.Format("Actor:{0}\r\nTitle:{1}\r\nLabel:{2}\r\nMaker:{3}\r\nReleaseDate:{4}\r\n番号:{5}\r\n ", m.Actor[0], m.Title, m.Lable, m.Maker, m.ReleaseDate.ToShortDateString(), m.AVCode));
  194. // return m;
  195. // }
  196. // catch (Exception)
  197. // {
  198. // throw new Exception("获取影片信息失败!");
  199. // }
  200. //}
  201. public Movie GetMovie(MovieBasic basic)
  202. {
  203. try
  204. {
  205. string html = wc.GetHTML(new Uri(basic.ItemURL));
  206. var docc = new HtmlDocument();
  207. docc.LoadHtml(html);
  208. //web DOM Changed 2014/12/7
  209. string Title = docc.DocumentNode.SelectSingleNode("//div[@id='detail_new']//div[@class='detail_title_new2']//h1").InnerText;
  210. string coverImg = docc.DocumentNode.SelectNodes("//div[@id='detail_new']//img[@class='item_img']")[0].Attributes["src"].Value.ToString();
  211. if (coverImg.StartsWith("//"))
  212. {
  213. coverImg = "http:" + coverImg;
  214. }
  215. //AV女優:
  216. var item1 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[1]/td[2]").InnerText.Trim();
  217. var actorsArr = item1.Split(new char[] { '\r',' ', '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList<string>();
  218. //AVメーカー 制造厂
  219. var item2 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[2]/td[2]").InnerText.Trim();
  220. //AVレーベル 唱片公司
  221. var item3 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[3]/td[2]").InnerText.Trim();
  222. //シリーズ 系列
  223. var item4 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[4]/td[2]").InnerText.Trim();
  224. //監督 导演
  225. var item5 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[5]/td[2]").InnerText.Trim();
  226. //発売日:
  227. var item6 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[6]/td[2]").InnerText.Trim();
  228. //収録時間
  229. var item7 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[7]/td[2]").InnerText.Trim();
  230. //品番
  231. var item8 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[8]/td[2]").InnerText.Trim();
  232. //タグ: 标签
  233. var item9 = docc.DocumentNode.SelectSingleNode("//table[@class='item']/tr[9]/td[2]").InnerText.Trim();
  234. //作品紹介
  235. string descrition;
  236. try
  237. {
  238. descrition = docc.DocumentNode.SelectSingleNode("//div[@class='item_text']").InnerText.Trim();
  239. }
  240. catch (Exception)
  241. {
  242. descrition = "";
  243. }
  244. Movie m = new Movie()
  245. {
  246. Actor = actorsArr,
  247. Title = titleCleaner(Title),
  248. Lable = item9,
  249. Maker = item2,//ideapocket
  250. ReleaseDate = dateCleaner(item6),
  251. Minutes = item7,
  252. AVCode = f_codeCleaner(item8),
  253. Introduction = descrition,
  254. CoverURL = coverImg,
  255. ItemURL = basic.ItemURL,
  256. Series = item4,
  257. //导演
  258. Producer = item5
  259. };
  260. //Console.WriteLine(string.Format("Actor:{0}\r\nTitle:{1}\r\nLabel:{2}\r\nMaker:{3}\r\nReleaseDate:{4}\r\n番号:{5}\r\n ", m.Actor[0], m.Title, m.Lable, m.Maker, m.ReleaseDate.ToShortDateString(), m.AVCode));
  261. return m;
  262. }
  263. catch (Exception)
  264. {
  265. throw new Exception("获取影片信息失败!");
  266. }
  267. }
  268. //标题除杂
  269. private string titleCleaner(string title)
  270. {
  271. title = Tools.RemoveInvalidChars(title);
  272. while (title.IndexOf("&nbsp;") != -1)
  273. {
  274. title = title.Replace("&nbsp;", "");
  275. }
  276. while (title.IndexOf("廃盤") != -1)
  277. {
  278. title = title.Replace("廃盤", "");
  279. }
  280. //var ddd ="[MIDD-983]Baby Entertainment×MOODYZコラボ作品 淫神の女泥棒 哀しき痙攣の追憶 Dear.F 1 恥辱的、屈辱的なイカせの拷問! 反反复复反反复复方法";
  281. if (title.Length > 82)
  282. {
  283. title = title.Substring(0, 81);
  284. }
  285. return title;
  286. }
  287. //日期
  288. private DateTime dateCleaner(string date)
  289. {
  290. System.Text.RegularExpressions.Regex r = new System.Text.RegularExpressions.Regex(@"\d{4}/\d{2}/\d{2}");
  291. if (r.IsMatch(date))
  292. {
  293. date = r.Match(date).Value;
  294. }
  295. else
  296. {
  297. date = "1900/01/01";
  298. }
  299. return DateTime.Parse(date);
  300. }
  301. //番号
  302. private string f_codeCleaner(string f_code)
  303. {
  304. while (f_code.IndexOf("&nbsp;") != -1)
  305. {
  306. f_code = f_code.Replace("&nbsp;", "");
  307. }
  308. while (f_code.IndexOf("廃盤") != -1)
  309. {
  310. f_code = f_code.Replace("廃盤", "");
  311. }
  312. f_code = Tools.Fcode(f_code);
  313. return f_code;
  314. }
  315. public bool GetCover(Movie mo)
  316. {
  317. wc.ReferURL = mo.ItemURL;
  318. FileInfo f = new FileInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Cover", mo.AVCode + ".jpg"));
  319. try
  320. {
  321. if (!Directory.Exists(Path.GetDirectoryName(f.FullName)))
  322. {
  323. Directory.CreateDirectory(Path.GetDirectoryName(f.FullName));
  324. }
  325. if (!File.Exists(f.FullName))
  326. {
  327. wc.DownloadFile(mo.CoverURL, f.FullName);
  328. }
  329. else
  330. {
  331. Console.WriteLine("已有封面 " + mo.Title);
  332. }
  333. }
  334. catch (Exception err)
  335. {
  336. return false;
  337. }
  338. mo.CoverFile = f.FullName;
  339. return true;
  340. }
  341. Uri urlCombine(string fcode)
  342. {
  343. string ur = "http://www.arzon.jp/itemlist.html?t=&m=all&s=&mkt=all&disp=30&sort=-saledate&list=list&q=" + fcode;
  344. return new Uri(ur);
  345. }
  346. public object Clone()
  347. {
  348. //MemoryStream ms = new MemoryStream();
  349. //BinaryFormatter bf = new BinaryFormatter();
  350. //bf.Serialize(ms, this);
  351. //ms.Seek(0, 0);
  352. //object value = bf.Deserialize(ms);
  353. //ms.Close();
  354. //return value;//this.MemberwiseClone();
  355. MemoryStream ms = new MemoryStream();
  356. BinaryFormatter bf = new BinaryFormatter();
  357. bf.Serialize(ms, this.wc.m_container);
  358. ms.Seek(0, 0);
  359. object value = bf.Deserialize(ms);
  360. Arzon ar = new Arzon(false);
  361. ar.wc.m_container = value as System.Net.CookieContainer;
  362. return ar;
  363. }
  364. public List<MovieBasic> FindInURL(string url)
  365. {
  366. HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
  367. doc.LoadHtml(wc.GetHTML(new Uri(url)));
  368. var nodes = doc.DocumentNode.SelectNodes("//div[@class='pictlist']");
  369. List<MovieBasic> list = new List<MovieBasic>();
  370. if ( nodes==null)
  371. {
  372. return list;
  373. }
  374. foreach (var item in nodes)
  375. {
  376. var itemurl = "http://www.arzon.jp" + item.SelectSingleNode(".//dt/a").Attributes["href"].Value.ToString().Trim();
  377. var title = item.SelectSingleNode(".//dt/a").Attributes["title"].Value.ToString().Trim();
  378. var img = item.SelectSingleNode(".//img[1]").Attributes["src"].Value.ToString().Trim();
  379. list.Add(new MovieBasic() { ItemURL = itemurl, Img_s = img, Title = title });
  380. }
  381. return list;
  382. }
  383. }
  384. }