Extract All Email from a URL and Save it in a Text File using ASP.Net C#
So Today I am gonna discuss how can we extract all the email Ids present in a Particular URL using asp.net(C#)..
Create a Form Design like as given below..
<html xmlns="http://www.w3.org/1999/xhtml" >
<head runat="server">
<title>Extract Email From URL</title>
</head>
<body>
<form id="form1" runat="server">
<div>
<h1>Extract Email By URL ~~ By Vishal Ranjan</h1>
<br />
<br />
<br />
<asp:Label ID="Label1" runat="server" Font-Bold="True" Text="Enter URL:"></asp:Label>
<asp:TextBox ID="TextBox1" runat="server"></asp:TextBox><br />
<br />
<br />
<br />
<br />
<asp:Button ID="Button1" runat="server" OnClick="Button1_Click" Text="Search" /><br />
<br />
<br />
<br />
<asp:Label ID="lblMsg" runat="server" Font-Bold="True" ForeColor="Red"></asp:Label></div>
</form>
</body>
</html>
Then Create a Class File to Extract the Emails as follows.. Name the Class file as GetEmails.cs
using System;
using System.Data;
using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace FindAllEmails
{
public class GetEmails
{
//public method called from your application
public void RetrieveEmails(string webPage)
{
GetAllEmails(RetrieveContent(webPage));
}
//get the content of the web page passed in
private string RetrieveContent(string webPage)
{
HttpWebResponse response = null;//used to get response
StreamReader respStream = null;//used to read response into string
try
{
//create a request object using the url passed in
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(webPage);
request.Timeout = 10000;
//go get a response from the page
response = (HttpWebResponse)request.GetResponse();
//create a streamreader object from the response
respStream = new StreamReader(response.GetResponseStream());
//get the contents of the page as a string and return it
return respStream.ReadToEnd();
}
catch (Exception ex)//houston we have a problem!
{
throw ex;
}
finally
{
//close it down, we're going home!
response.Close();
respStream.Close();
}
}
//using a regular expression, find all of the href or urls
//in the content of the page
private void GetAllEmails(string content)
{
//regular expression
string pattern = @"(([\w-]+\.)+[\w-]+|([a-zA-Z]{1}|[\w-]{2,}))@" + @"((([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])\.([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])\." + @"([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])\.([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])){1}|"+ @"([a-zA-Z]+[\w-]+\.)+[a-zA-Z]{2,4})";
//Set up regex object
Regex RegExpr = new Regex(pattern, RegexOptions.IgnoreCase);
//get the first match
Match match = RegExpr.Match(content);
//loop through matches
while (match.Success)
{
//output the match info to a file named matchlog.txt in D drive..
Console.WriteLine("href match: " + match.Groups[0].Value);
WriteToLog("D:matchlog.txt", "Email match: " + match.Groups[0].Value + Environment.NewLine);
//get next match
match = match.NextMatch();
}
}
//Write to a log file
private void WriteToLog(string file, string message)
{
using (StreamWriter w = File.AppendText(file))
{
w.WriteLine(DateTime.Now.ToString() + ": " + message); w.Close();
}
}
}
}
Finally In the Code Behind create the object of that class by referring the corresponding Namespace..
using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using FindAllEmails;
public partial class ExtractEmail : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
}
protected void Button1_Click(object sender, EventArgs e)
{
try
{
string url = TextBox1.Text.Trim();
GetEmails ge = new GetEmails();
ge.RetrieveEmails(url);
TextBox1.Text = "";
lblMsg.Text = @"Emails Retreived and Saved to D:\matchlog.txt";
}
catch (Exception ex)
{
lblMsg.Text = "The following Error Just Occured: " + ex.Message;
}
}
}
Once You have done this we are all set to go.. Just Enter the URL link with http:// attached in the corresponding Textbox and click on search button.. All the Emails Present in the associated page will be copied to the file matchlog.txt in D drive.. You can change the path of file Wherever you want ..
This Program can be used to create a web service for extracting all Emails from a Page and creating a database of emails and then auto-sending that emails at scheduled interval. Just a Thought.. :-)