from pyvirtualdisplay import Display from selenium import webdriver from datetime import datetime import time import csv display = Display(visible=0, size=(800, 600)) display.start() browser = webdriver.Chrome() browser.get('http://www.spareroom.co.uk/flatshare/search.pl?searchtype=advanced') #browser.find_element_by_xpath("//input[@name='location_type' and @value='zone']").click() #browser.find_element_by_xpath("//input[@name='location_type' and @value='commuter']").click() #browser.find_element_by_xpath("//select[@name='max_commute_time']/option[@value='10']").click() #browser.find_element_by_xpath("//select[@name='station_id']/option[@value='FARRINGDON']").click() browser.find_element_by_name("search").send_keys('Mile End, Bethnal Green, London Fields, Broadway Market') browser.find_element_by_name("min_rent").send_keys('500') browser.find_element_by_name("max_rent").send_keys('900') browser.find_element_by_xpath("//input[@name='per' and @value='pcm']").click() browser.find_element_by_xpath("//input[@name='showme_1beds']").click() browser.find_element_by_xpath("//input[@name='showme_buddyup_properties']").click() browser.find_element_by_xpath("//input[@name='living_room']").click() browser.find_element_by_xpath("//input[@name='room_types' and @value='double']").click() browser.find_element_by_xpath("//select[@name='min_term']/option[@value='12']").click() browser.find_element_by_xpath("//select[@name='max_term']/option[@value='0']").click() browser.find_element_by_xpath("//input[@name='available_search']").click() browser.find_element_by_xpath("//select[@name='day_avail']/option[@value='01']").click() browser.find_element_by_xpath("//select[@name='mon_avail']/option[@value='10']").click() browser.find_element_by_xpath("//select[@name='year_avail']/option[@value='2016']").click() browser.find_element_by_xpath("//input[@name='genderfilter' and @value='mixed']").click() browser.find_element_by_name("min_age_req").send_keys('20') browser.find_element_by_name("max_age_req").send_keys('30') #browser.find_element_by_xpath("//select[@name='min_beds']/option[@value='3']").click() #browser.find_element_by_xpath("//select[@name='max_beds']/option[@value='6']").click() browser.find_element_by_xpath("//input[@name='landlord' and @value='live_out']").click() browser.find_element_by_xpath("//input[@name='photoadsonly']").click() browser.find_element_by_xpath("//input[@name='days_of_wk_available' and @value='7 days a week']").click() browser.find_element_by_xpath("//input[@name='posted_by' and @value='private_landlords']").click() #result=browser.find_element_by_class_name("listing-results").find_elements_by_tag_name("li") #for i in range(10): # if i == 2: # continue # result[i].text.split("\nAvailable ")[1].rsplit('\n')[0] #for item in browser.find_element_by_class_name("listing-results").find_elements_by_tag_name("li")[0:11]: # item.text.split("\nAvailable ")[1].rsplit('\n')[0] # item.find_element_by_css_selector('a').get_attribute('href') #browser.find_element_by_class_name("listing-results").find_elements_by_tag_name("li") #browser.find_element_by_class_name("listing-results").find_elements_by_tag_name("li")[10].text.split("Available ")[1].rsplit('\n')[0] browser.find_element_by_name("submit").click() print(browser.find_element_by_id('results_header').text) print(browser.current_url) #browser.quit() #display.stop() maxpage=int(float(browser.find_element_by_xpath("//p[@class='navcurrent']/strong[2]").text)/10) totalListA = [] totalListB = [] for i in range(1,maxpage): for j in range(1,12): z=browser.find_element_by_xpath("//ul[@class='listing-results']/li["+str(j)+"]") if z.get_attribute('class') != '': continue q = z.text t = q.split("\nAvailable ")[1].rsplit('\n')[0] if t == 'Now': continue r = datetime.strptime(t, '%d %b') if r <= datetime.strptime('2 Sep', "%d %b"): continue totalListA.append(t) l = browser.find_element_by_xpath("//ul[@class='listing-results']/li["+str(j)+"]/article/header/a").get_attribute('href') totalListB.append(l) browser.find_element_by_xpath("//ul[@class='navnext']/li/strong/a").click() with open('/home/martin/www/text.txt', 'w') as f: writer = csv.writer(f, delimiter=',') writer.writerows(zip(totalListA, totalListB))