from pyvirtualdisplay import Display from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from dateutil.parser import parse import urllib.request from bs4 import BeautifulSoup import re page=BeautifulSoup(urllib.request.urlopen("https://www.crowd2fund.com/exchange").read(), 'html.parser') totalItems=int(page.select_one("span[class=total-items]").text) #totalItems=200 maxPage=totalItems/15 pages=[str(i) for i in range(1,int(maxPage) + 1)] soups = [] for i in pages: soup=BeautifulSoup(urllib.request.urlopen("https://www.crowd2fund.com/exchange/"+i).read(), 'html.parser') soup2=soup.find("tbody", { "class" : "ajax-content" }).findAll("tr",recursive=False) soups.extend(soup2) for i in range(0,len(soups)): row=BeautifulSoup(str(soups[i]), 'html.parser') company=row.findAll('td')[1].find('a').text date=parse(row.findAll('td')[3].text).strftime('%m-%d-%Y') cell=row.findAll('td')[6].find('span').get('title').replace(" ", "")[0:7].replace(r'/', '0') payments=re.sub("[^0-9]", "", cell).split("0",1)[0] term=re.sub("[^0-9]", "", cell).split("0",1)[1] # term=row.findAll('td')[6].find('span').get('title').replace(" ", "")[3:5] a_apr=re.sub('[!%£]', '', row.findAll('td')[7].text.replace(" ", "")).strip('\n') b_apr=re.sub('[!%£]', '', row.findAll('td')[8].text.replace(" ", "")).strip('\n') earn=re.sub('[!%£,]', '', row.findAll('td')[9].text.replace(" ", "")).strip('\n') price=re.sub('[!%£,]', '', row.findAll('td')[10].text.replace(" ", "")).strip('\n') # rem=int(term)-int(payments) num=row.find num=row.findAll('td')[10].find('a').get('data-url') print(company+','+payments+','+term+','+a_apr+','+b_apr+','+earn+','+price+','+num+','+date) #print(soups) #print(len(soups)) #print(soups)