dd=div.find("div",attrs={"class":"sightdetail"})
lis=dd.find("ul",attrs={"class":"sightbase"}).find_all("li")
sSource=[]
if len(lis)>1:
for link in lis[1].find_all("a"):
sSource.append(link.text)
5、爬取景区时间与级别
景区的时间与级别是在
self.con.close()
def insertDB(self,sName,sType,sSource,sLevel,sTime,sHotel):
try:
sql="insert into scenes (sName,sType,sSource,sLevel,sTime,sHotel) values
(?,?,?,?,?,?)"
self.cursor.execute(sql,[sName,json.dumps(sType),json.dumps(sSource),sLevel,sTime,json.dumps(
sHotel)])
except:
pass
def spider(self,url):
try:
resp=urllib.request.urlopen(url)
html=resp.read().decode()
soup=BeautifulSoup(html,"lxml")
divs=soup.find("div",attrs={"class":"sightlist"}).find_all("div",attrs={"class":"sightshow"})
for div in divs:
dd=div.find("div",attrs={"class":"sightdetail"})
sName=dd.find("h4").find("a").text
lis = dd.find("ul", attrs={"class": "sightbase"}).find_all("li")
sType = []
if len(lis) > 0:
for link in lis[0].find_all("a"):
sType.append(link.text)
sSource=[]
if len(lis)>1:
for link in lis[1].find_all("a"):
sSource.append(link.text)
if len(lis)>2:
sLevel=lis[2].find("span").find("a").text
sTime = lis[2].find("a",recursive=False).text
else:
sLevel=""
sTime=""
lis = dd.find("ul", attrs={"class": "sighthotel"}).find_all("li")
sHotel = []
for li in lis:
h = {}
h["name"] = li.find("a").text
h["price"] = li.find("span").text
sHotel.append(h)
8