[問題] 請教一個程式作業

作者: jk808631 (耶耶)   2014-06-30 16:27:37
這是期末考的補救機會作業
老師要我完成的是剩下的部份,底下說明以後的部分
import urllib.request
from bs4 import BeautifulSoup
def getText(url, encoding='utf-8'):
#url = 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/lw1939-pale-in-comparison/1825297.html'
html = urllib.request.urlopen(urllib.request.Request(url))
soup = BeautifulSoup(html, from_encoding=encoding)
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # rip it out
# get text
text = soup.get_text()
# break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
# break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
# drop blank lines
text = '\n'.join(chunk for chunk in chunks if chunk)
return text
import re
def getVOA(url):
res=getText(url)
lst = re.split(r'\n', res)
text=''
first_hint=False #'列印'
second_hint=False #'美國之音'
start=True
for e in lst:
if re.match(r'列印', e):
if second_hint:
second_hint=False
else:
first_hint=True
continue
if first_hint and re.match(r'美國之音', e):
second_hint=True
continue
if second_hint and re.match(r'學個詞-\d+-\w+', e):
start=True
if second_hint and start:
text+=e
return text
urls=['http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base64-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTktc3RpY2tlci1zaG9jaw~~/1943689.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTgtZ3JhY2UtcGVyaW9k/1943688.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTctY2l2aWwtd2Fy/1943687.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTYtZGlzcGFyYWdl/1943685.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTUtcHJvaGliaXQ~/1939100.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTQtc3dpdGNo/1939098.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTMtdm9pY2U~/1939094.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTItbWFzY290/1939093.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTEtZXhjaGFuZ2U~/1939092.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTAtYnJlYWR3aW5uZXI~/1935520.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDktYW5vbnltb3Vz/1935516.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDgtZHJhZnQ~/1935513.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDctaWRlbnRpZnk~/1935511.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDYtbmF0aW9ud2lkZQ~~/1935509.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTMzNy1jaGFyaXR5LQ~~/1933985.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDUtY29udHJpYnV0aW9u/1928911.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDQtY29udGFnaW91cw~~/1928909.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDMtYXNzZXNz/1928907.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDItZ3JhZmZpdGk~/1928906.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDEtZnVuZGluZw~~/1928904.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base64-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDAtYWNjb21wbGlzaG1lbnQ~/1925331.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base64-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzktcHVibGljLXRyYW5zaXQ~/1925330.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzgtZGF0YWJhc2U~/1925329.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzctaGVhcmluZw~~/1925327.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzYtcmFudA~~/1925325.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/media/video/1936377.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzUtcHJvZm91bmQ~/1919322.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzQtcGxhbi1i/1919321.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzMtdG94aWM~/1919314.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base64-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzItb24tdGhlLWJyaW5rLW9m/1919312.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzEtY29tcGVs/1919311.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzAtbWF4LW91dA~~/1914530.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjktc2NyZWVuLXRpbWU~/1914527.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjgtdW5leHBlY3RlZA~~/1914522.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjctZGl2ZXJzZQ~~/1914519.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjYtd2lkZS1yYW5naW5n/1914515.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjUtYXQtbm8tY2hhcmdl/1914512.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjQtcmVoYWItY2VudGVy/1914508.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjMtY29tcGxhaW50/1914506.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjItc3VzcGljaW9u/1914504.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjEtb3V0bGF3/1914503.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/media/video/1936263.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjAtbGV0aGFs/1904640.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMTktcG9pc2Vk/1904636.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMTgtbWFyaXRpbWU~/1904632.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base64-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMTctc3VzdGFpbmFiaWxpdHk~/1904630.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base48-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMTYtYmFy/1904626.html',
'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMTUtc2tlbGV0b24~/1899500.html']
#撰寫迴圈將urls中的每一個連結的文字內容個別存入一個文字檔
#文字檔檔名以連結的檔名為檔名, 附檔名則將html改成txt.
#例如urls[0]的檔名為1943689, 故存成的文字檔必須是1943689.txt
#以下示範程式可以顯示 1943689的文字內容, 但你要寫迴圈來批次
#讀取與寫入內容. 完成後請email給我程式檔及所擷取的文字檔.(可以用zip壓縮)
想請教怎麼把文字檔檔名儲存成每個網址後面的數字
迴圈的部分也不太懂到底該怎麼寫....
各位可以救救我嗎
作者: uranusjr (←這人是超級笨蛋)   2014-06-30 16:43:00
已經是補救機會還只能問人, 我看還是明年再來對你比較好
作者: ck574b027 (荒圍!定厝!賊!妹!)   2014-06-30 21:47:00
google "python 另存一個文字檔" 很困難嗎?
作者: yauhh (小y寶貝)   2014-06-30 21:52:00
for name in urls: ... 那樣可以,只要你假定網址都那個規格
作者: goldflower (金色小黃花)   2014-07-01 17:00:00
你迴圈用open應該就能直接建立新的文件 然後再寫進去比如f = open('hello.txt','w') 會建立一個叫hello的檔菇狗一下 "文件讀寫 python" 應該會有很多資料

Links booklink

Contact Us: admin [ a t ] ucptt.com