diff options
author | Michael Stahl <mstahl@redhat.com> | 2015-06-03 12:14:31 +0200 |
---|---|---|
committer | Michael Stahl <mstahl@redhat.com> | 2015-06-04 21:51:25 +0200 |
commit | 0cfe2c8c893bfe6d1c2dce5941065eb4e841e7cc (patch) | |
tree | 7ab6e97e474653679d4ed5c7dc92f366826a8217 /bin/get-bugzilla-attachments-by-mimetype | |
parent | update to liborcus 0.9.1 (diff) | |
download | core-0cfe2c8c893bfe6d1c2dce5941065eb4e841e7cc.tar.gz core-0cfe2c8c893bfe6d1c2dce5941065eb4e841e7cc.zip |
get-bugzilla-attachments: fix the multi-threading
The queue was limited to an arbitrary maximum size, causing half of the
jobs to be droppend on the floor. Also it didn't run on Python 3.
Change-Id: I90bfba448291d901c5a7c83389d17c6acdd919c8
Diffstat (limited to 'bin/get-bugzilla-attachments-by-mimetype')
-rwxr-xr-x | bin/get-bugzilla-attachments-by-mimetype | 34 |
1 files changed, 17 insertions, 17 deletions
diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype index 9ae182cf5196..7e6dc83ec8be 100755 --- a/bin/get-bugzilla-attachments-by-mimetype +++ b/bin/get-bugzilla-attachments-by-mimetype @@ -31,7 +31,11 @@ import re import os, os.path import stat import sys -import threading, Queue +import threading +try: + import queue +except: + import Queue as queue try: from urllib.request import urlopen except: @@ -206,7 +210,7 @@ def get_through_rss_query(queryurl, mimetype, prefix, suffix, startid): get_bug_function = get_novell_bug_via_xml if prefix == "novell" else get_from_bug_url_via_xml def process(query, full, have=[]): - url = queryurl + '?' + '&'.join(['='.join(kv) for kv in query.iteritems()]) + url = queryurl + '?' + '&'.join(['='.join(kv) for kv in query.items()]) print('url is ' + url) d = feedparser.parse(url) print(str(len(d['entries'])) + ' bugs to process') @@ -493,44 +497,40 @@ class manage_threads(threading.Thread): # Get job from queue # Use job parameters to call our query # Then let the queue know we are done with this job - job = jobs.get(True,6) - get_through_rss_query(job[0], job[1], job[2], job[3], job[4]) # [0] = uri; [1] = mimetype; [2] = prefix; [3] = extension; [4] = startid - jobs.task_done() + (uri, mimetype, prefix, extension, startid) = jobs.get(True,6) + try: + get_through_rss_query(uri, mimetype, prefix, extension, startid) + finally: + jobs.task_done() except KeyboardInterrupt: raise # Ctrl+C should work - except: + except queue.Empty: break def generate_multi_threading(): for (prefix, uri, startid) in rss_bugzillas: # Initialize threads - for i in xrange(max_threads): + for i in range(max_threads): manage_threads().start() # Create a job for every mimetype for a bugzilla for (mimetype,extension) in mimetypes.items(): - - # It seems that bugzilla has problems returing that many results # (10000 results is probably a limit set somewhere) so we always # end processing the complete list. if mimetype == 'text/html' and prefix == 'moz': continue - try: - jobs.put([uri, mimetype, prefix, extension, startid], block=True, timeout=3) - print("successfully placed a job in the queue searching for " + mimetype + " in bugtracker " + prefix) - except KeyboardInterrupt: - raise # Ctrl+C should work - except: - print("Queue full") + jobs.put([uri, mimetype, prefix, extension, startid], block=True) + print("successfully placed a job in the queue searching for " + mimetype + " in bugtracker " + prefix) # Continue when all mimetypes are done for a bugzilla jobs.join() + print("DONE with bugtracker " + prefix) max_threads = 20 # Number of threads to create, (1 = without multi-threading) -jobs = Queue.Queue(40) +jobs = queue.Queue() generate_multi_threading() |