闲来无事,最近学了golang,并且写了一个爬虫爬人家网站的泛域名的标题,记录一下。功能有:可以随机ua,没做代理ip,自动创建baidu.txt和自动保存爬到的标题到baidu.txt
闲来无事,最近学了golang,并且写了一个爬虫爬人家网站的泛域名的标题,记录一下。功能有:可以随机ua,没做代理ip,自动创建baidu.txt和自动保存爬到的标题到baidu.txt

package main
import (
"fmt"
"io/ioutil"
"net/http"
"regexp"
"math/rand"
"time"
"os"
)
var workerCount=0
var maxworKerCount=32
var searchRequest=make(chan string)
var workerDone=make(chan bool)
var foundMatch=make(chan bool)
var cookie string ="in_userid=1; in_username=xxxxxqq.com; in_userpassword=xxxxx; in_adminid=3; in_adminname=xxxxxqq.com; in_adminpassword=xxxxx; in_permission=1%2C2%2C3%2C4%2C5%2C6"
var userAgentList = []string{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
}
func GetRandomUserAgent() string {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
return userAgentList[r.Intn(len(userAgentList))]
}
func getUrlRespHtml() string {
rand.Seed(time.Now().UnixNano())
url := "http://"+randomString(5)+".wholdlj.cn/"
client := &http.Client{}
req, err := http.NewRequest("GET", url, nil)
if err!=nil {
fmt.Println("获取地址错误")
}
req.Header.Add("User-Agent",GetRandomUserAgent() )
resp, err := client.Do(req)
if err!=nil {
fmt.Println("获取地址错误")
}
resp_byte, err := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
respHtml := string(resp_byte)
return respHtml
}
func Initialize(){
reader := getUrlRespHtml()
Reg := `<title>(.*?)</title>`
rp1 := regexp.MustCompile(Reg)
heads := rp1.FindAllStringSubmatch(reader, -1)
//打开文件
fo,err:=os.OpenFile("baidu.txt",os.O_APPEND,6)
if err!=nil{
fmt.Println("os.OpenFile err",err)
return
}
for _, v := range heads {
fo.Write([]byte(v[1]+"\r\n"))
fmt.Printf("title:%s\r\n",v[1])
}
}
// Returns an int >= min, < max
func randomInt(min, max int) int {
return min + rand.Intn(max-min)
}
// Generate a random string of A-Z chars with len = l
func randomString(len int) string {
bytes := make([]byte, len)
for i := 0; i < len; i++ {
bytes[i] = byte(randomInt(65, 90))
}
return string(bytes)
}
func mains() {
for i :=0;i<50000;i++{
Initialize()
}
}
func main() {
//创建文件
star:= time.Now()
fc,err:=os.Create("baidu.txt")
if err!=nil{
fmt.Println("os.Create err",err)
return
}
fc.Close()
mains()
fmt.Println(time.Since(star))
}转载请注明来自本站(66娱乐网)
66优乐网 » golang第一个爬虫实战
66优乐网 » golang第一个爬虫实战