using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Net;
using HtmlAgilityPack;
namespace Web
{
class Scraper
{
public Scraper()
{
}
public string DownloadWebPage(string url)
{
using(WebClient client
= new WebClient
()) {
string pageContent = client.DownloadString(url);
return pageContent;
};
}
public List<string> ParseLinkText(string url)
{
HtmlWeb web
= new HtmlWeb
(); HtmlDocument document = web.Load(url);
List
<string>
documentLinkText
= new List
<string>();
foreach (HtmlNode link in document.DocumentNode.SelectNodes("//a[@href]"))
{
string linkText = link.InnerText;
documentLinkText.Add(linkText);
}
return documentLinkText;
}
public List<string> ParseLinkValue(string url)
{
HtmlWeb web
= new HtmlWeb
(); HtmlDocument document = web.Load(url);
List
<string>
documentLinkAttributeValue
= new List
<string>();
foreach (HtmlNode link in document.DocumentNode.SelectNodes("//a[@href]"))
{
string attributeValue = link.GetAttributeValue("href", "href");
documentLinkAttributeValue.Add(attributeValue);
}
return documentLinkAttributeValue;
}
}
}