使用python对整个网页进行截图

作者在 2014-03-10 16:15:54 发布以下内容

方法一、使用PyQt4的QtWebKit组件

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# 来源 http://www.oschina.net/code/snippet_219811_14920

import sys
import os.path
from PyQt4 import QtGui,QtCore,QtWebKit

class PageShotter(QtGui.QWidget):
    def __init__(self,url,filename,parent=None):
        QtGui.QWidget.__init__(self,parent)
        self.url = url
        self.filename = filename
        self.webpage = None

    def shot(self):
        webview = QtWebKit.QWebView(self)
        webview.load(QtCore.QUrl(self.url))
        self.webpage = webview.page()
        self.connect(webview,QtCore.SIGNAL("loadFinished(bool)"),self.save_page)

    def save_page(self,finished):
        #print finished
        if finished:
            print u"开始截图!"
            size = self.webpage.mainFrame().contentsSize()
            print u"页面宽:%d,页面高:%d" % (size.width(),size.height())
            self.webpage.setViewportSize(QtCore.QSize(size.width()+16,size.height()))
            img = QtGui.QImage(size, QtGui.QImage.Format_ARGB32)
            painter = QtGui.QPainter(img)
            self.webpage.mainFrame().render(painter)
            painter.end()
            filename= self.filename;
            if img.save(filename):
                filepath = os.path.join(os.path.dirname(__file__), filename)
                print u"截图完毕:%s" % filepath
            else:
                print u"截图失败";
        else:
            print u"网页加载失败!"
        self.close()

if __name__=="__main__":
    app = QtGui.QApplication(sys.argv)
    #shotter = PageShotter("http://www.adssfwewfdsfdsf.com")
    shotter = PageShotter("http://www.youku.com/", 'shot.png')
    shotter.shot()
    sys.exit(app.exec_())


方法二、使用selenium

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import time
from selenium import webdriver

browser = webdriver.Firefox()
browser.set_window_size(1055, 800)
browser.get("http://www.yooli.com/")
browser.find_element_by_id("idClose").click()
time.sleep(5)

browser.save_screenshot("shot.png")
browser.quit()

Python | 阅读 26831 次
文章评论,共0条
游客请输入验证码
浏览2896919次
文章归档
最新评论
  • 静夜思:-1是多核
  • 时光拾荒者:CtrlCV还是强的😝
  • 硬识岩丝:解决了,太感谢了~