on May 3rd, 2009diggfoto hack

diggfoto 上的图片还蛮不错的,但是要一张一张去点确实有点不人道(总在期望下一张更精彩?),下面的程序可以帮忙把它的图片都下回来,事先最好准备一个 diggfoto 文件夹,嘿嘿。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#! /usr/bin/env python
#coding=utf-8

import os
import sys
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup

def download(url='http://diggfoto.com/'):    
    try:
        r = urllib2.urlopen(url)
        page = r.read()
       
        soup = BeautifulSoup(page)
        entry = soup.find('div', 'entry')
        a = entry.find('a')
        nextLink = a['href']
        img = a.find('img')
        imageUrl = img['src']
        dotindex = imageUrl.rindex('.')        
    except AttributeError, e:
        print e
    except IndexError, e:
        print e
    except UnicodeDecodeError, e:
        print e
    except:
        print 'Oops'
    else:        
        if len(img['alt']) == 0:
            slash = url.rfind('/', 0, len(url)-2)
            rand = url[slash+1:url.rfind('/')]
        else:
            rand = img['alt']
       
        if not os.path.exists('diggfoto'):
            print 'Is there a directory named diggfoto?'
            sys.exit(0)
       
        name = os.path.join('diggfoto', rand + imageUrl[dotindex:dotindex+4])
        exist = os.path.exists(name)

        if imageUrl and not exist:
            try:
                print 'Downloading Image ==> %s' % name
            except:
                print 'Downloading Image ...'
            try:
                urllib.urlretrieve(imageUrl, name)
            except:
                download(nextLink)
       
        if nextLink:
            print 'Next url ==> %s' % nextLink
            if nextLink.endswith('tags/') or nextLink.endswith('archive/') \
                or nextLink.endswith('about/'):
                print 'Complete'
            else:
                download(nextLink)
   
if __name__ == "__main__":
    sys.exit(download())

12 Responses to “diggfoto hack”

  1. sky=yton 25 Aug 2009 at 6:56 pm

    invalid syntax 在 AttributeError, e 这个逗号处
    我是菜鸟,完全不懂

  2. ant21on 25 Aug 2009 at 8:53 pm

    有下载 BeautifulSoup 吗?
    地址在:http://www.crummy.com/software/BeautifulSoup/
    解压出来的 py 文件放到程序同一目录下。
    或者是复制代码的时候弄错了?我跑了一下,没问题。作了点小的改进,你再试试看。

  3. rooton 27 Oct 2009 at 11:54 pm

    不懂什么代码也,怎么用法啊!!!

  4. rooton 28 Oct 2009 at 11:03 am

    无法运行啊,大哥可不可以教一下小弟啊?

  5. rooton 28 Oct 2009 at 11:52 am

    哎呀,自己解决了…谢谢,谢谢

  6. sky=yton 04 Nov 2009 at 12:38 pm

    Hi~还是没有用成功,总是显示invalid syntax 在 AttributeError, e 我下载了beautiful soup,用windows下IDLE的run module运行,还是不行,估计是我的方法有错,请再指点一下吧。

  7. ant21on 04 Nov 2009 at 3:45 pm

    复制下代码比如保存成 diggfoto.py 文件,随便在哪儿,和 beautifulsoup 放一块儿就行,建一个叫做 diggfoto 的文件夹用来放下载的照片,然后在终端敲入 python diggfoto.py 运行。

  8. rooton 07 Nov 2009 at 2:22 pm

    ActivePython-2.6.3.7-win32-x86.msi

    我装了这个就OK啦,

  9. narkissoson 23 Dec 2009 at 8:25 pm

    哥们,good job
    我在你code上做了点改进,这样就可以‘断点续传’了:)

    #! /usr/bin/env python
    #coding=utf-8

    import os
    import sys
    import urllib
    import urllib2
    from BeautifulSoup import BeautifulSoup

    def download(url):
    try:
    r = urllib2.urlopen(url)
    page = r.read()

    soup = BeautifulSoup(page)
    entry = soup.find(‘div’, ‘entry’)
    a = entry.find(‘a’)
    nextLink = a['href']
    img = a.find(‘img’)
    imageUrl = img['src']
    dotindex = imageUrl.rindex(‘.’)
    except AttributeError, e:
    print e
    except IndexError, e:
    print e
    except UnicodeDecodeError, e:
    print e
    except:
    print ‘Oops’
    else:
    if len(img['alt']) == 0:
    slash = url.rfind(‘/’, 0, len(url)-2)
    rand = url[slash+1:url.rfind('/')]
    else:
    rand = img['alt']

    if not os.path.exists(‘diggfoto’):
    print ‘Is there a directory named diggfoto?’
    sys.exit(0)

    name = os.path.join(‘diggfoto’, rand + imageUrl[dotindex:dotindex+4])
    exist = os.path.exists(name)

    if imageUrl and not exist:
    try:
    print ‘Downloading Image ==> %s’ % name
    except:
    print ‘Downloading Image …’
    try:
    urllib.urlretrieve(imageUrl, name)
    except:
    download(nextLink)

    if nextLink:
    print ‘Next url ==> %s’ % nextLink
    if nextLink.endswith(‘tags/’) or nextLink.endswith(‘archive/’) \
    or nextLink.endswith(‘about/’):
    print ‘Complete’
    else:
    url_file = open(‘diggfoto.dat’, ‘w’)
    url_file.write(url);
    url_file.close( )
    download(nextLink)

    if __name__ == “__main__”:
    try:
    url_file = open(‘diggfoto.dat’, ‘r’)
    url = url_file.read( )
    except:
    url = ‘http://diggfoto.com’
    else:
    url_file.close( )

    sys.exit(download(url))

  10. narkissoson 23 Dec 2009 at 8:30 pm

    sorry,格式有点乱

    #! /usr/bin/env python
    #coding=utf-8

    import os
    import sys
    import urllib
    import urllib2
    from BeautifulSoup import BeautifulSoup

    def download(url):  
        try:
            r = urllib2.urlopen(url)
            page = r.read()
            
            soup = BeautifulSoup(page)
            entry = soup.find(‘div’, ’entry’)
            a = entry.find(‘a’)
            nextLink = a['href']
            img = a.find(‘img’)
            imageUrl = img['src']
            dotindex = imageUrl.rindex(‘.’)        
        except AttributeError, e:
            print e
        except IndexError, e:
            print e
        except UnicodeDecodeError, e:
            print e
        except:
            print ’Oops’
        else:        
            if len(img['alt']) == 0:
                slash = url.rfind(‘/’, 0, len(url)-2)
                rand = url[slash+1:url.rfind('/')]
            else:
                rand = img['alt']
            
            if not os.path.exists(‘diggfoto’):
                print ’Is there a directory named diggfoto?’
                sys.exit(0)
            
            name = os.path.join(‘diggfoto’, rand + imageUrl[dotindex:dotindex+4])
            exist = os.path.exists(name)

            if imageUrl and not exist:
                try:
                    print ’Downloading Image ==> %s’ % name
                except:
                    print ’Downloading Image …’
                try:
                    urllib.urlretrieve(imageUrl, name)
                except:
                    download(nextLink)
            
            if nextLink:
                print ’Next url ==> %s’ % nextLink
                if nextLink.endswith(‘tags/’) or nextLink.endswith(‘archive/’) \
                    or nextLink.endswith(‘about/’):
                    print ’Complete’
                else:
                    url_file = open(‘diggfoto.dat’, ’w')
                    url_file.write(url);
                    url_file.close( )
                    download(nextLink)
        
    if __name__ == ”__main__”:
      try:
        url_file = open(‘diggfoto.dat’, ’r')
        url = url_file.read( )
      except:
        url = ’http://diggfoto.com’
      else:
        url_file.close( )
        
      sys.exit(download(url))

  11. ant21on 23 Dec 2009 at 11:23 pm

    嗯,很棒!:)

  12. Sky=yton 30 Jan 2010 at 11:38 pm

    已经弄好了,开始下载了~非常好的,谢谢你

Trackback URI | Comments RSS

Leave a Reply