各位先進
最近初接觸爬蟲
想用 Beautifulsoup 抓類似下面網站的內容
http://propaccess.trueautomation.com/clientdb/?cid=81
但送了 Post 以後卻無回傳值 看起來是沒有讓伺服器收到form data
想向各位求救 幫忙看看 code 哪裡需要做修正
目前想依照 Owner name 搭配 Advanced 裡面的顯示條件來爬蟲
import requests
from bs4 import BeautifulSoup
import re
from decimal import Decimal
import pandas as pd
import urllib
index_url = 'http://propaccess.trueautomation.com/clientdb/?cid=81'
session = requests.Session()
#Get session cookies (session ID)
index_request = session.get(index_url)
r = urllib.request.urlopen(index_url)
soup = BeautifulSoup(r, 'lxml')
viewstate = soup.findAll("input", {"type": "hidden", "name": "__VIEWSTATE"})
viewstategenerator = soup.findAll(
"input", {"type": "hidden", "name": "__VIEWSTATEGENERATOR"})
eventvalidation = soup.findAll(
"input", {"type": "hidden", "name": "__EVENTVALIDATION"})
formdata = {
"propertySearchOptions%3AsearchType:": "Owner Name",
"propertySearchOptions%3AownerName": 'smith',
"propertySearchOptions%3Ataxyear": "2016",
"propertySearchOptions%3ApropertyType": 'Mineral',
"propertySearchOptions%253AorderResultsBy": "Owner Name",
"propertySearchOptions%253ArecordsPerPage": "250",
"__EVENTVALIDATION": eventvalidation[0]['value'],
"__VIEWSTATE": viewstate[0]['value'],
"__VIEWSTATEGENERATOR": viewstategenerator[0]['value'],
"propertySearchOptions%253Asearch": "Search"}
response_post = session.post(index_url, data= formdata)
soup_post = BeautifulSoup(response_post.text, 'lxml')
感謝大神們