Posts Tagged facebook

Facebook Like Link Sharing (Screen Scraping) in ASP.NET

As a part for our latest project, wwww.fitjunction.com, I have implemented facebook and digg like link sharing (screen scraping) in ASP.NET so i thought it would be a good idea to share it with the world. Here are the steps:

1. Capture the Page HTML using the WebRequest and WebResponse Objects:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
Public Function GetHtmlPage(ByVal strURL As String) As String

Try

Dim result As String

Dim response As WebResponse
Dim request As WebRequest = Net.HttpWebRequest.Create(strURL)

response = request.GetResponse

Using sr As New StreamReader(response.GetResponseStream)

result = sr.ReadToEnd

sr.Close()

End Using

Return result

Catch ex As Exception

Return ""

End Try

End Function

The above mentioned function will capture the page HTML using the web request object, which you can parse to get the page title and Images.

Step2: Get Page Title from HTML using Regular Expressions:

1
2
3
4
5
6
7
8
9
10
11
12
13
Private Function GetTitle(ByVal content As String) As String

Dim pattern As String

pattern = "(?<=<title.*>)([\s\S]*)(?=</title>)"

Dim match As RegularExpressions.Match

match = Regex.Match(content, pattern)

Return match.Value

End Function

The above function will parse the page HTML and return the page title.

Step 3: Get Image Links mentioned in HTML:

Now the next step is to parse the HTML using regular expressions and get all image tags. The following function also checks for relative image links and appends the links with the host name. After all the images are found, they are downloaded to a temporary folder. The images are downloaded in order to identify their width and height. Only the images within a pre defined width and height are added to the data table, rest of them are ignored.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
Private Function Getpictures(ByVal content As String) As DataTable

Dim pattern As String

pattern = "(?<=src=(\x22|\x27))[^>]*[^/].(?:jpg|bmp|gif|JPG|BMP|GIF|PNG|png)[^\x22|\x27]*(?=\x22|\x27)"

Dim matches As RegularExpressions.MatchCollection

matches = Regex.Matches(content, pattern)

' Return match.Value

Dim arr As New ArrayList

Dim dt As New DataTable

dt.Columns.Add("link", GetType(String))

allpics = New DataTable

allpics.Columns.Add("link", GetType(String))

For Each match As Match In matches

arr.Add(match.Value)

Dim row As DataRow

row = dt.NewRow

If CType(match.Value, String).Contains("http://") Then

row.Item(0) = match.Value

Else

If Me.txtlink.Text.Chars(Me.txtlink.Text.Length - 1) = "/" Then

If CType(match.Value, String).Chars(0) = "/" Then

'    Response.Write("<img src='" & Me.TextBox1.Text & CType(arr.Item(count), String).Remove(0, 1) & "'><br>")

row.Item(0) = GetHost(Me.txtlink.Text) & CType(match.Value, String).Remove(0, 1)

Else

' Response.Write("<img src='" & Me.TextBox1.Text & CType(arr.Item(count), String) & "'><br>")

row.Item(0) = GetHost(Me.txtlink.Text) & "/" & CType(match.Value, String)

End If

Else

If CType(match.Value, String).Chars(0) = "/" Then

'Response.Write("<img src='" & Me.TextBox1.Text & CType(arr.Item(count), String) & "'><br>")

row.Item(0) = GetHost(Me.txtlink.Text) & CType(match.Value, String)

Else

'   Response.Write("<img src='" & Me.TextBox1.Text & "/" & CType(arr.Item(count), String) & "'><br>")

row.Item(0) = GetHost(Me.txtlink.Text) & "/" & CType(match.Value, String)

End If

End If

End If

dt.Rows.Add(row)

Next

Dim dt2 As New DataTable

dt2.Columns.Add("link", GetType(String))

For Each ro As DataRow In dt.Rows

Try

Dim id As String

id = Guid.NewGuid.ToString & ".jpg"

Dim link As String

link = ro.Item("link")

My.Computer.Network.DownloadFile(ro.Item("link"), HttpContext.Current.Request.PhysicalApplicationPath & "DesktopModules\User Articles\tempimages\" & id)

Dim file As System.Drawing.Image

file = Drawing.Image.FromFile(HttpContext.Current.Request.PhysicalApplicationPath & "DesktopModules\User Articles\tempimages\" & id)

Dim prow As DataRow

prow = allpics.NewRow

prow.Item("link") = id

allpics.Rows.Add(prow)

If file.Width < 50 And file.Height < 50 Then

'   dt.Rows.Remove(ro)

ElseIf file.Width > 500 Or file.Height > 500 Then

'  dt.Rows.Remove(ro)

Else

Dim row As DataRow

row = dt2.NewRow

row.Item("link") = id

dt2.Rows.Add(row)

End If

file.Dispose()

file = Nothing

Catch ex As Exception

End Try

Next

Return dt2

End Function

, ,

19 Comments