Loading ...

C# Parse Meta Tags | CodeAsp.Net

C# Parse Meta Tags

 /5
0 (0votes)

CODE SNIPPET - C# Parse Meta Tags

You may come across an instance in your C# and ASP.NET programming where you need to download an external webpage and parse the meta tags... specifically, the "Title," "Meta Description," and "Meta Keywords."

The method below will show you how to:

   - download an external webpage
   - parse the meta title
   - parse the meta description
   - parse the meta keywords

The parsing is done using regular expressions.

NOTE: This may not be the best way of doing this, but it is a solution that you can use.

using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;

namespace Tim.Examples.Classes
{
    public class WebMetaData
    {
        public string metaTitle;
        public string metaDescription;
        public string metaKeywords;

        public bool GetMetaTags(string url)
        {
            try{
                //get the HTML of the given page and put into a string
                string html = AcquireHTML(url);

                if (GetMeta(html))
                {
                    return true;
                }
                else
                {
                    return false;
                }
            }
            catch(Exception ex)
            {
                // do something with the error
                return false;
            }
        }

        private string AcquireHTML(string address)
        {
            HttpWebRequest request;
            HttpWebResponse response = null;
            StreamReader reader;
            StringBuilder sbSource;

            try
            {
                // Create and initialize the web request  
                request = System.Net.WebRequest.Create(address) as HttpWebRequest;
                request.UserAgent = "your-search-bot";
                request.KeepAlive = false;
                request.Timeout = 10 * 1000;

                // Get response  
                response = request.GetResponse() as HttpWebResponse;

                if (request.HaveResponse == true && response != null)
                {
                    // Get the response stream  
                    reader = new StreamReader(response.GetResponseStream());

                    // Read it into a StringBuilder  
                    sbSource = new StringBuilder(reader.ReadToEnd());

                    response.Close();

                    // Console application output  
                    return sbSource.ToString();
                }
                else
                    return "";
            }
            catch (Exception ex)
            {
                response.Close();
                return "";
            }
        }

        private bool GetMeta(string strIn)
        {
            try
            {
                // --- Parse the title
                Match TitleMatch = Regex.Match(strIn, "<title>([^<]*)</title>, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                metaTitle = TitleMatch.Groups[1].Value;

                // --- Parse the meta keywords
                Match KeywordMatch = Regex.Match(strIn, "<meta name=\"keywords\" content=\"([^<]*)\">", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                metaKeywords = KeywordMatch.Groups[1].Value;

                // --- Parse the meta description
                Match DescriptionMatch = Regex.Match(strIn, "<meta name=\"description\" content=\"([^<]*)\">", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                metaDescription = DescriptionMatch.Groups[1].Value;

                return true;
            }
            catch (Exception ex)
            {
                // do something with the error
                return false;
            }
        }

    }
}

Comments (no comments yet)

Top Posts