Some tricks to use PyQt? Application as frontend for Scrapy .
Task: I want to build app with button that starts scrapy parsing in thread , and all log all messages to one QTextEdit based widget.
MainWidget?.py
class ActionWindow(widgets.BaseAction,ui.submit_fun.Ui_Form):
def __init__(self, parent=None):
super(ActionWindow, self).__init__(parent)
self.setupUi(self)
self.setWindowTitle("Scrapy Fun")
self.connect(self.but_FetchUrls, SIGNAL("clicked()"), self.on_but_FetchUrls)
...
def on_but_FetchUrls():
# self.txt_Logger - logging widget
scrapthread= ScrapThread(self.txt_Logger)
scrapthread.start()
self.txt_Logger is QTextEdit based widget . with function append Text. We export it direct from UIDesigner .
widgets.py
...
class LogWidget(QtGui.QTextEdit):
def __init__(self, parent=None):
super(LogWidget, self).__init__(parent)
def appendText(self, txt):
"""
Public method to append text to the end.
@param txt text to insert (QString)
"""
tc = self.textCursor()
tc.movePosition(QTextCursor.End)
self.setTextCursor(tc)
self.insertPlainText(txt)
self.ensureCursorVisible()
...
Main goal of scrapthread.start is to correct execute scappy commands for example :
from scrapy.command.cmdline import execute execute(['start.py','crawl','gfl_info'])
This is analog of
./scapy-ctl.py crawl gfl_info
To do that wee need to set environment variable SCRAPY_SETTINGS_MODULE into name of settings scrapy module and ofcourse make visible our scrapy project for qtapplication
import sys
import os
# Triki Miki - 1 should be seted to use scrapy.command.cmdline
os.environ.setdefault('SCRAPY_SETTINGS_MODULE', 'gfl_info.settings')
sys.path.append("../scrapyprojects/gfl_info")
We start our scrapy in new thread - thats why signals dont work . When we start our script now we receive somthing like:
Traceback (most recent call last):
File "/home/user/MProg/gamecollector/qtend/actions/MainWidget.py", line 78, in run
execute(['start.py','crawl','gfl_info','gfl2_info'])
File "/usr/lib/python2.5/site-packages/scrapy-0.7-py2.5.egg/scrapy/command/cmdline.py", line 132, in execute
scrapymanager.configure(control_reactor=True)
File "/usr/lib/python2.5/site-packages/scrapy-0.7-py2.5.egg/scrapy/core/manager.py", line 66, in configure
install_shutdown_handlers(self._signal_shutdown)
File "/usr/lib/python2.5/site-packages/scrapy-0.7-py2.5.egg/scrapy/utils/ossignal.py", line 21, in install_shutdown_handlers
reactor._handleSignals()
File "/usr/lib/python2.5/site-packages/Twisted-8.2.0-py2.5-linux-i686.egg/twisted/internet/base.py", line 1068, in _handleSignals
signal.signal(signal.SIGINT, self.sigInt)
ValueError: signal only works in main thread
And it's absolutly fairly . Lets go to scrapy/core/manager.py and try to understand what install_shutdown_handlers do. Nothing special :) - only controlls shutdown signals (such as SIGINT, SIGTERM). Its good feature but threads are cooler .
in our startup section (where os.environ.setdefault was ) lets write
# Triki Miki - 1.1 remove scrapy.utils.ossignal.install_shutdown_handlers(func,override_sigint=True) # Signals dont work in subthread import scrapy.utils.ossignal scrapy.utils.ossignal.install_shutdown_handlers = lambda x:None
Now errors are dissapeared.
And a little bit more code
import twisted.python.log
import scrapy.log
class WidgetLoggingHandler(logging.Handler):
""" logging.handler for save log in widget that support appendText"""
def __init__(self,widget,thread):
logging.Handler.__init__(self)
self.widget=widget
self.thread=thread
self.widget.connect(self.thread, SIGNAL("LOGOUTPUT"), self.showText)
def showText(self,txt):
self.widget.appendText(txt +"\n")
def emit(self,record):
txt = self.format(record)
self.thread.emit(SIGNAL("LOGOUTPUT"),txt)
class ScrapThread(QThread):
def __init__(self,logwidget=None,*argv):
QThread.__init__(self,*argv)
self.logwidget=logwidget
if logwidget:
ch = WidgetLoggingHandler(logwidget,self)
ch.setLevel(logging.DEBUG)
# create formatter
formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s::%(funcName)s - %(message)s")
# add formatter to ch
ch.setFormatter(formatter)
self.clean_attach_logging_handler("frontend",ch)
self.clean_attach_logging_handler("twisted",ch)
def clean_attach_logging_handler(self,loggername,ch):
lg = logging.getLogger(loggername)
for hdlr in lg.handlers:
hdlr.acquire()
try:
lg.handlers.remove(hdlr)
finally:
hdlr.release()
lg.addHandler(ch)
#self.test_thread()
def run(self):
# Triki Miki - 2 - if exists default observer this means - no observers from scrapy installed and log.startLogging was nat called
if twisted.python.log.defaultObserver:
observer = twisted.python.log.PythonLoggingObserver()
observer.start()
twisted.python.log.defaultObserver=None
scrapy.log.start()
scrapy.log.msg("Scrapy Initialised", level=scrapy.log.INFO)
from scrapy.command.cmdline import execute
try:
execute(['start.py','crawl','digg.com'])
except Exception,e:
print "EROR BLIAD"
print traceback.print_exc()
logging.getLogger("frontend").critical(traceback.format_exc())
