一个asp采集程序类

作者在 2007-06-07 17:01:00 发布以下内容

 
一个asp采集程序类
 
'==================================================
'函数名:GetHttpPage
'作 用:获取网页源码
'参 数:HttpUrl ------网页地址
'==================================================
GetHttpPage(HttpUrl)
If IsNull(HttpUrl)=True Or Len(HttpUrl)<18 Or HttpUrl="$False$" Then
GetHttpPage="$False$"
Exit
End If
Dim Http
Set Http=server.createobject("MSXML2.XMLHTTP")
Http.open "GET",HttpUrl,False
Http.Send()
If Http.Readystate<>4 then
Set Http=Nothing
GetHttpPage="$False$"
Exit
End if
GetHTTPPage=bytesToBSTR(Http.responseBody,"GB2312")
Set Http=Nothing
If Err.number<>0 then
Err.Clear
End If
End
'==================================================
'函数名:BytesToBstr
'作 用:将获取的源码转换为中文
'参 数:Body ------要转换的变量
'参 数:Cset ------要转换的类型
'==================================================
BytesToBstr(Body,Cset)
Dim Objstream
Set Objstream = Server.CreateObject("adodb.stream")
objstream.Type = 1
objstream.Mode =3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End
'==================================================
'函数名:UrlEncoding
'作 用:转换编码
'==================================================
UrlEncoding(DataStr)
Dim StrReturn,Si,ThisChr,InnerCode,Hight8,Low8
StrReturn = ""
For Si = 1 To Len(DataStr)
ThisChr = Mid(DataStr,Si,1)
If Abs(Asc(ThisChr)) < &HFF Then
StrReturn = StrReturn & ThisChr
Else
InnerCode = Asc(ThisChr)
If InnerCode < 0 Then
InnerCode = InnerCode + &H10000
End If
Hight8 = (InnerCode And &HFF00)\ &HFF
Low8 = InnerCode And &HFF
StrReturn = StrReturn & "%" & Hex(Hight8) & "%" & Hex(Low8)
End If
Next
UrlEncoding = StrReturn
End
'==================================================
'函数名:GetBody
'作 用:截取字符串
'参 数:ConStr ------将要截取的字符串
'参 数:StartStr ------开始字符串
'参 数:OverStr ------结束字符串
'参 数:IncluL ------是否包含StartStr
'参 数:IncluR ------是否包含OverStr
'==================================================
GetBody(ConStr,StartStr,OverStr,IncluL,IncluR)
If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True Or StartStr="" or IsNull(StartStr)=True Or OverStr="" or IsNull(OverStr)=True Then
GetBody="$False$"
Exit
End If
Dim ConStrTemp
Dim Start,Over
ConStrTemp=Lcase(ConStr)
StartStr=Lcase(StartStr)
OverStr=Lcase(OverStr)
Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare)
If Start<=0 then
GetBody="$False$"
Exit
Else
If IncluL=False Then
Start=Start+LenB(StartStr)
End If
End If
Over=InStrB(Start,ConStrTemp,OverStr,vbBinaryCompare)
If Over<=0 Or Over<=Start then
GetBody="$False$"
Exit
Else
If IncluR=True Then
Over=Over+LenB(OverStr)
End If
End If
GetBody=MidB(ConStr,Start,Over-Start)
End
'==================================================
'函数名:GetArray
'作 用:提取链接地址,以$Array$分隔
'参 数:ConStr ------提取地址的原字符
'参 数:StartStr ------开始字符串
'参 数:OverStr ------结束字符串
'参 数:IncluL ------是否包含StartStr
'参 数:IncluR ------是否包含OverStr
'==================================================
GetArray(Byval ConStr,StartStr,OverStr,IncluL,IncluR)
If ConStr="$False$" or ConStr="" Or IsNull(ConStr)=True or StartStr="" Or OverStr="" or IsNull(StartStr)=True Or IsNull(OverStr)=True Then
GetArray="$False$"
Exit
End If
Dim TempStr,TempStr2,objRegExp,Matches,Match
TempStr=""
Set objRegExp = New Regexp
objRegExp.IgnoreCase = True
objRegExp.Global = True
objRegExp.Pattern = "("&StartStr&").+?("&OverStr&")"
Set Matches =objRegExp.Execute(ConStr)
For Each Match in Matches
TempStr=TempStr

默认分类 | 阅读 4872 次
文章评论,共0条
游客请输入验证码
浏览109214次