on May 3rd, 2009diggfoto hack
diggfoto 上的图片还蛮不错的,但是要一张一张去点确实有点不人道(总在期望下一张更精彩?),下面的程序可以帮忙把它的图片都下回来,事先最好准备一个 diggfoto 文件夹,嘿嘿。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | #! /usr/bin/env python #coding=utf-8 import os import sys import urllib import urllib2 from BeautifulSoup import BeautifulSoup def download(url='http://diggfoto.com/'): try: r = urllib2.urlopen(url) page = r.read() soup = BeautifulSoup(page) entry = soup.find('div', 'entry') a = entry.find('a') nextLink = a['href'] img = a.find('img') imageUrl = img['src'] dotindex = imageUrl.rindex('.') except AttributeError, e: print e except IndexError, e: print e except UnicodeDecodeError, e: print e except: print 'Oops' else: if len(img['alt']) == 0: slash = url.rfind('/', 0, len(url)-2) rand = url[slash+1:url.rfind('/')] else: rand = img['alt'] if not os.path.exists('diggfoto'): print 'Is there a directory named diggfoto?' sys.exit(0) name = os.path.join('diggfoto', rand + imageUrl[dotindex:dotindex+4]) exist = os.path.exists(name) if imageUrl and not exist: try: print 'Downloading Image ==> %s' % name except: print 'Downloading Image ...' try: urllib.urlretrieve(imageUrl, name) except: download(nextLink) if nextLink: print 'Next url ==> %s' % nextLink if nextLink.endswith('tags/') or nextLink.endswith('archive/') \ or nextLink.endswith('about/'): print 'Complete' else: download(nextLink) if __name__ == "__main__": sys.exit(download()) |
invalid syntax 在 AttributeError, e 这个逗号处
我是菜鸟,完全不懂
有下载 BeautifulSoup 吗?
地址在:http://www.crummy.com/software/BeautifulSoup/
解压出来的 py 文件放到程序同一目录下。
或者是复制代码的时候弄错了?我跑了一下,没问题。作了点小的改进,你再试试看。
不懂什么代码也,怎么用法啊!!!
无法运行啊,大哥可不可以教一下小弟啊?
哎呀,自己解决了…谢谢,谢谢
Hi~还是没有用成功,总是显示invalid syntax 在 AttributeError, e 我下载了beautiful soup,用windows下IDLE的run module运行,还是不行,估计是我的方法有错,请再指点一下吧。
复制下代码比如保存成 diggfoto.py 文件,随便在哪儿,和 beautifulsoup 放一块儿就行,建一个叫做 diggfoto 的文件夹用来放下载的照片,然后在终端敲入 python diggfoto.py 运行。
ActivePython-2.6.3.7-win32-x86.msi
我装了这个就OK啦,
哥们,good job
我在你code上做了点改进,这样就可以‘断点续传’了:)
#! /usr/bin/env python
#coding=utf-8
import os
import sys
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup
def download(url):
try:
r = urllib2.urlopen(url)
page = r.read()
soup = BeautifulSoup(page)
entry = soup.find(‘div’, ‘entry’)
a = entry.find(‘a’)
nextLink = a['href']
img = a.find(‘img’)
imageUrl = img['src']
dotindex = imageUrl.rindex(‘.’)
except AttributeError, e:
print e
except IndexError, e:
print e
except UnicodeDecodeError, e:
print e
except:
print ‘Oops’
else:
if len(img['alt']) == 0:
slash = url.rfind(‘/’, 0, len(url)-2)
rand = url[slash+1:url.rfind('/')]
else:
rand = img['alt']
if not os.path.exists(‘diggfoto’):
print ‘Is there a directory named diggfoto?’
sys.exit(0)
name = os.path.join(‘diggfoto’, rand + imageUrl[dotindex:dotindex+4])
exist = os.path.exists(name)
if imageUrl and not exist:
try:
print ‘Downloading Image ==> %s’ % name
except:
print ‘Downloading Image …’
try:
urllib.urlretrieve(imageUrl, name)
except:
download(nextLink)
if nextLink:
print ‘Next url ==> %s’ % nextLink
if nextLink.endswith(‘tags/’) or nextLink.endswith(‘archive/’) \
or nextLink.endswith(‘about/’):
print ‘Complete’
else:
url_file = open(‘diggfoto.dat’, ‘w’)
url_file.write(url);
url_file.close( )
download(nextLink)
if __name__ == “__main__”:
try:
url_file = open(‘diggfoto.dat’, ‘r’)
url = url_file.read( )
except:
url = ‘http://diggfoto.com’
else:
url_file.close( )
sys.exit(download(url))
sorry,格式有点乱
#! /usr/bin/env python
#coding=utf-8
import os
import sys
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup
def download(url):
try:
r = urllib2.urlopen(url)
page = r.read()
soup = BeautifulSoup(page)
entry = soup.find(‘div’, ’entry’)
a = entry.find(‘a’)
nextLink = a['href']
img = a.find(‘img’)
imageUrl = img['src']
dotindex = imageUrl.rindex(‘.’)
except AttributeError, e:
print e
except IndexError, e:
print e
except UnicodeDecodeError, e:
print e
except:
print ’Oops’
else:
if len(img['alt']) == 0:
slash = url.rfind(‘/’, 0, len(url)-2)
rand = url[slash+1:url.rfind('/')]
else:
rand = img['alt']
if not os.path.exists(‘diggfoto’):
print ’Is there a directory named diggfoto?’
sys.exit(0)
name = os.path.join(‘diggfoto’, rand + imageUrl[dotindex:dotindex+4])
exist = os.path.exists(name)
if imageUrl and not exist:
try:
print ’Downloading Image ==> %s’ % name
except:
print ’Downloading Image …’
try:
urllib.urlretrieve(imageUrl, name)
except:
download(nextLink)
if nextLink:
print ’Next url ==> %s’ % nextLink
if nextLink.endswith(‘tags/’) or nextLink.endswith(‘archive/’) \
or nextLink.endswith(‘about/’):
print ’Complete’
else:
url_file = open(‘diggfoto.dat’, ’w')
url_file.write(url);
url_file.close( )
download(nextLink)
if __name__ == ”__main__”:
try:
url_file = open(‘diggfoto.dat’, ’r')
url = url_file.read( )
except:
url = ’http://diggfoto.com’
else:
url_file.close( )
sys.exit(download(url))
嗯,很棒!:)
已经弄好了,开始下载了~非常好的,谢谢你