from pyvirtualdisplay import Display from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import urllib.request from bs4 import BeautifulSoup import re #high=1000 low=600 inc=20 i=30 gap="https://properties.movebubble.com/find-homes-to-rent?min-price="+str(low+(inc*i))+"&max-price="+str((low+(inc*i))+inc)+"&bedrooms=0%2C1" print(gap) page=BeautifulSoup(urllib.request.urlopen(gap).read(), 'html.parser') totalItems=page.select_one("h1[class=total-results-text]").text totalItems=int(re.sub('[^0-9]','', totalItems)) print(totalItems) #totalItems=20 maxPage=totalItems/20 pages=[str(i) for i in range(1,int(maxPage) + 1)] #pages=range(1,10,100) soups = [] for i in pages: soup=BeautifulSoup(urllib.request.urlopen(gap+"&page="+i).read(), 'html.parser') soup2=soup.findAll("div", { "class" : "property-card" }) # print(soup2) # soup2=soup.findAll("div", { "class" : "property-card" }) soups.extend(soup2) #print(soups[1]) for i in range(0,len(soups)): item=BeautifulSoup(str(soups[i]), 'html.parser') url=item.find('div',{'class': 'property-card-slider rsDefault'}).get('property-link') page=BeautifulSoup(urllib.request.urlopen("https://properties.movebubble.com"+url).read(), 'html.parser') title=page.find('h1',{'class':'strong-custom'}).text.replace(",", "").replace(" ", "") price=page.find('strong',{'class':'property-page__price--value'}).text.replace(" ", "") price2=re.sub('[,£]', '', price).strip() price3=price2 gmap=page.find('iframe', {'class' : 'property-page__map-frame'}).get('src') lat=gmap.split(",",1)[0].split('=',1)[1] lon=gmap.split(",",1)[1].split('&',1)[0] # url=page.current_url # cell=row.findAll('td')[6].find('span').get('title').replace(" ", "")[0:7].replace(r'/', '0') # payments=re.sub("[^0-9]", "", cell).split("0",1)[0] # term=re.sub("[^0-9]", "", cell).split("0",1)[1] ## term=row.findAll('td')[6].find('span').get('title').replace(" ", "")[3:5] # a_apr=re.sub('[!%£]', '', row.findAll('td')[7].text.replace(" ", "")).strip('\n') # b_apr=re.sub('[!%£]', '', row.findAll('td')[8].text.replace(" ", "")).strip('\n') # earn=re.sub('[!%£]', '', row.findAll('td')[9].text.replace(" ", "")).strip('\n') # price=re.sub('[!%£]', '', row.findAll('td')[10].text.replace(" ", "")).strip('\n') # # rem=int(term)-int(payments) # num=row.findAll('td')[10].find('a').get('data-url')[36:41] print((title+','+price3+','+lat+','+lon+','+url[-7:])) #print(soups) #print(len(soups)) #print(soups)