Coder Profile - Show off your skills, get a coder profile.
 
 
 
code pin board Basic Web Scraper Download Source Code
Author Details Code Information
shadytyrant ( Travus )

Pinned 5 Codes
Posted 5 Coding Articles

Send A Message
View Coders Profile
Language C# (C Sharp)
Expires Never
Length 1,590 Characters (55 Lines)
Password no password
Description

Basic web scraper class that downloads web pages and parses links.
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.IO;
  6. using System.Net;
  7. using HtmlAgilityPack;
  8.  
  9. namespace Web
  10. {
  11.     class Scraper
  12.     {
  13.         public Scraper()
  14.         {
  15.  
  16.         }
  17.  
  18.         public string DownloadWebPage(string url)
  19.         {
  20.             using(WebClient client = new WebClient())
  21.             {
  22.                 string pageContent = client.DownloadString(url);
  23.                 return pageContent;
  24.             };
  25.         }
  26.  
  27.         public List<string> ParseLinkText(string url)
  28.         {
  29.             HtmlWeb web = new HtmlWeb();
  30.             HtmlDocument document = web.Load(url);
  31.             List<string> documentLinkText = new List<string>();
  32.  
  33.             foreach (HtmlNode link in document.DocumentNode.SelectNodes("//a[@href]"))
  34.             {
  35.                 string linkText = link.InnerText;
  36.                 documentLinkText.Add(linkText);
  37.             }
  38.             return documentLinkText;
  39.         }
  40.  
  41.         public List<string> ParseLinkValue(string url)
  42.         {
  43.             HtmlWeb web = new HtmlWeb();
  44.             HtmlDocument document = web.Load(url);
  45.             List<string> documentLinkAttributeValue = new List<string>();
  46.  
  47.             foreach (HtmlNode link in document.DocumentNode.SelectNodes("//a[@href]"))
  48.             {
  49.                 string attributeValue = link.GetAttributeValue("href", "href");
  50.                 documentLinkAttributeValue.Add(attributeValue);
  51.             }
  52.             return documentLinkAttributeValue;
  53.         }
  54.     }
  55. }
code pin board Back To Code Pin Board Post New Code
Please login to post comments.
Page 1 of 1
 
 
Latest News About Coder Profile
Coder Profile Poll
Why do you get bored with programming?

Not enough time to do something productive
I run out of ideas
Too hard to show people my creations
Everything i do has too many errors, and it's too hard
I don't get bored!!!


please login to cast your vote
and see the results of this poll
Latest Coder Profile Changes
Coder Profile was last updated
3.49 Years Ago
Official Blog :: Make A Donation :: Credits :: Contact Me
Terms & Conditions :: Privacy Policy :: Documents :: Wallpapers
Version 1.46.00
Copyright © 2007 - 2012, Scott Thompson, All Rights Reserved