python 版本,需要 PIL 跟 pycurl ,將就用吧..
#!coding=utf-8
import os
import time
import math
import pycurl
import operator
from PIL import Image
from StringIO import StringIO
def main():
picUrl = r"https://event.franklin.com.tw/C2014_11_TGF/showimg.aspx?date="
path = os.path.normpath(os.path.dirname(__file__)+"/pic")
noPic = Image.open(os.path.join(os.path.dirname(__file__), "no.jpg"))
noH = noPic.histogram()
if os.path.isdir(path):
pass
else:
os.makedirs(path)
print "目錄 "+path+" 不存在,產生新目錄."
print "圖片將儲存於 "+path+" 目錄."
for y in xrange(1951, 2015):
y = str(y)
for m in xrange(1, 12):
if m < 10:
#1951年9月16日前資料不存在所以跳過,寫法不是很好,將就一下
if m < 9 and y == "1951":
continue
m = "0"+str(m)
else:
m = str(m)
print "開始擷取 "+y+"年"+m+"月."
for d in xrange(1, 31):
if d < 10:
date = y+m+"0"+str(d)
else:
date = y+m+str(d)
savefile = os.path.normpath(path+"/"+date+".jpg")
#print savefile
#圖片已存在或已下載就略過
if os.path.isfile(savefile):
print savefile+"已存在."
continue
else:
#嘗試取得圖片
try:
buffer = StringIO()
c = pycurl.Curl()
c.setopt(c.URL, picUrl+date)
c.setopt(c.WRITEFUNCTION, buffer.write)
c.perform()
c.close()
except:
#取得圖片失敗
continue
else:
try:
buffer.seek(0)
im = Image.open(buffer)
imH = im.histogram()
#比對圖片,數字越大說明相差的越大,相似度100%接近860
rms = math.sqrt(reduce(operator.add,
list(map(lambda a,b:(a-b)**2, noH, imH)))/len(noH)) )
#print rms
if rms > 870:
#圖片不相同
im.save(savefile, 'JPEG')
else:
#圖片比對相同就跳過
continue
except:
continue
else:
#圖片儲存成功
print time.strftime("%Y-%m-%d %H:%M:%S",
time.localtime())+" 儲存 "+savefile+" 成功."
if __name__ == '__main__':
main()