Skip to content

Commit 70163d2

Browse files
committed
concurrency examples
1 parent ab6ce5b commit 70163d2

File tree

13 files changed

+131
-56
lines changed

13 files changed

+131
-56
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
concurrency/flags/img/*.gif
2-
concurrency/charfinder_index.pickle
2+
concurrency/charfinder/charfinder_index.pickle
33
metaprog/oscon-schedule/data/schedule?_db
4+
concurrency/wikipedia/fixture/docroot/
45

56
# Byte-compiled / optimized / DLL files
67
__pycache__/

concurrency/charfinder/charfinder.html

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,21 @@
77
//(function() {
88
var BASE_URL = 'http://127.0.0.1:8888/chars';
99
var RESULTS_PER_REQUEST = 10;
10-
var REQUEST_DELAY = 1000; // in milliseconds
10+
var REQUEST_DELAY = 100; // in milliseconds
1111
var httpRequest = new XMLHttpRequest();
1212
httpRequest.onreadystatechange = processResponse;
1313

1414
function requestMaker(start) {
1515
var makeRequest = function (event) {
1616
var query = document.getElementById('queryField').value;
17-
var limit = RESULTS_PER_REQUEST;
18-
httpRequest.open('GET', BASE_URL+'?query='+query+'&limit='+limit);
17+
var stop = start + RESULTS_PER_REQUEST;
18+
var params = '?query='+query+'&start='+start+'&stop='+stop;
19+
httpRequest.open('GET', BASE_URL+params);
1920
httpRequest.send();
2021
document.getElementById('message').textContent = 'Query: ' + query;
2122
var table = document.getElementById('results');
2223
var tr;
23-
while (tr = table.lastChild) table.removeChild(tr);
24+
if (start == 0) while (tr = table.lastChild) table.removeChild(tr);
2425
return false; // don't submit form
2526
}
2627
return makeRequest;
@@ -104,7 +105,7 @@
104105
var table = document.getElementById('results');
105106
var tr;
106107
var characters = getSymbols(results.chars);
107-
for (var i=results.start; i < results.stop; i++) {
108+
for (var i=0; i < characters.length; i++) {
108109
ch = characters[i];
109110
if (ch == '\n') continue;
110111
if (ch == '\x00') break;
@@ -116,9 +117,10 @@
116117
tr.cells[1].appendChild(document.createTextNode(ch));
117118
tr.id = hexCode;
118119
table.appendChild(tr);
119-
if (results.stop < results.total) {
120-
setTimeout(requestMaker(results.stop)(), REQUEST_DELAY);
121-
}
120+
}
121+
// setTimeout(getDescriptions, REQUEST_DELAY/2)
122+
if (results.stop < results.total) {
123+
setTimeout(requestMaker(results.stop), REQUEST_DELAY);
122124
}
123125
}
124126
window.onload = function() {

concurrency/charfinder/charfinder.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ def query_type(text):
9898

9999
CharDescription = namedtuple('CharDescription', 'code_str char name')
100100

101+
QueryResult = namedtuple('QueryResult', 'len items')
102+
101103
class UnicodeNameIndex:
102104

103105
def __init__(self, chars=None):
@@ -169,12 +171,14 @@ def find_chars(self, query, start=0, stop=None):
169171
if result_sets:
170172
result = result_sets[0].intersection(*result_sets[1:])
171173
result = sorted(result) # must sort for consistency
172-
for char in itertools.islice(result, start, stop):
173-
yield char
174+
result_iter = itertools.islice(result, start, stop)
175+
return QueryResult(len(result),
176+
(char for char in result_iter))
177+
return QueryResult(0, ())
174178

175179
def find_codes(self, query, start=0, stop=None):
176180
return (ord(char) for char
177-
in self.find_chars(query, start, stop))
181+
in self.find_chars(query, start, stop).items)
178182

179183
def describe(self, char):
180184
code_str = 'U+{:04X}'.format(ord(char))
@@ -185,6 +189,10 @@ def find_descriptions(self, query, start=0, stop=None):
185189
for char in self.find_chars(query, start, stop):
186190
yield self.describe(char)
187191

192+
def get_descriptions(self, chars):
193+
for char in chars:
194+
yield self.describe(char)
195+
188196
def describe_str(self, char):
189197
return '{:7}\t{}\t{}'.format(*self.describe(char))
190198

-3.29 MB
Binary file not shown.

concurrency/charfinder/http_charfinder2.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,30 +75,26 @@ def form(request):
7575
@asyncio.coroutine
7676
def get_chars(request):
7777
peername = request.transport.get_extra_info('peername')
78-
query = request.GET.get('query', '')
79-
limit = request.GET.get('query', 0)
8078
print('Request from: {}, GET data: {!r}'.format(peername, dict(request.GET)))
79+
query = request.GET.get('query', '')
8180
if query:
8281
try:
8382
start = int(request.GET.get('start', 0))
8483
stop = int(request.GET.get('stop', sys.maxsize))
8584
except ValueError:
8685
raise web.HTTPBadRequest()
8786
stop = min(stop, start+RESULTS_PER_REQUEST)
88-
chars = list(index.find_chars(query, start, stop))
87+
num_results, chars = index.find_chars(query, start, stop)
8988
else:
90-
chars = []
91-
start = 0
92-
stop = 0
93-
num_results = len(chars)
89+
raise web.HTTPBadRequest()
9490
text = ''.join(char if n % 64 else char+'\n'
9591
for n, char in enumerate(chars, 1))
9692
response_data = {'total': num_results, 'start': start, 'stop': stop}
9793
print('Response to query: {query!r}, start: {start}, stop: {stop}'.format(
9894
query=query, **response_data))
9995
response_data['chars'] = text
10096
json_obj = json.dumps(response_data)
101-
print('Sending {} results'.format(num_results))
97+
print('Sending {} characters'.format(len(text)))
10298
headers = {'Access-Control-Allow-Origin': '*'}
10399
return web.Response(content_type=TEXT_TYPE, headers=headers, text=json_obj)
104100

concurrency/charfinder/test_charfinder.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ def test_find_word_1_match(sample_index):
4343

4444

4545
def test_find_word_1_match_character_result(sample_index):
46-
res = [name(char) for char in sample_index.find_chars('currency')]
46+
res = [name(char) for char in
47+
sample_index.find_chars('currency').items]
4748
assert res == ['EURO-CURRENCY SIGN']
4849

4950

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import sys
2+
import argparse
3+
import os
4+
5+
from daypicts import get_picture_url, validate_date, gen_dates
6+
from daypicts import NoPictureForDate
7+
from daypicts import POTD_PATH
8+
9+
FIXTURE_DIR = 'fixture/'
10+
11+
12+
def parse_args(argv):
13+
parser = argparse.ArgumentParser(description=main.__doc__)
14+
date_help = 'YYYY-MM-DD or YYYY-MM or YYYY: year, month and day'
15+
parser.add_argument('date', help=date_help)
16+
17+
args = parser.parse_args(argv)
18+
19+
try:
20+
iso_parts = validate_date(args.date)
21+
except ValueError as exc:
22+
print('error:', exc.args[0])
23+
parser.print_usage()
24+
sys.exit(2)
25+
26+
dates = list(gen_dates(iso_parts))
27+
if len(dates) == 1:
28+
print('-> Date: ', dates[0])
29+
else:
30+
fmt = '-> {} days: {}...{}'
31+
print(fmt.format(len(dates), dates[0], dates[-1]))
32+
33+
return dates, args
34+
35+
36+
def save_picture_urls(dates, save_path):
37+
for date in dates:
38+
try:
39+
url = get_picture_url(date)
40+
except NoPictureForDate as exc:
41+
snippet = repr(exc)
42+
else:
43+
snippet = url.replace('http://', 'src="//') + '"'
44+
print(date, end=' ')
45+
print(snippet)
46+
with open(os.path.join(save_path, date), 'w') as fp:
47+
fp.write(snippet)
48+
49+
50+
def main(argv):
51+
"""Build test fixture from Wikipedia "POTD" data"""
52+
53+
save_path = os.path.join(FIXTURE_DIR,POTD_PATH)
54+
try:
55+
os.makedirs(save_path)
56+
except FileExistsError:
57+
pass
58+
59+
dates, args = parse_args(argv)
60+
61+
save_picture_urls(dates, save_path)
62+
63+
if __name__ == '__main__':
64+
main(sys.argv[1:])

concurrency/wikipedia/daypicts.py

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
import requests
2626

2727
SAVE_DIR = 'pictures/'
28-
POTD_BASE_URL = 'http://en.wikipedia.org/wiki/Template:POTD/'
28+
POTD_PATH = 'Template:POTD/'
29+
POTD_BASE_URL = 'http://en.wikipedia.org/wiki/' + POTD_PATH
2930
POTD_IMAGE_RE = re.compile(r'src="(//upload\..*?)"')
3031
PODT_EARLIEST_TEMPLATE = '2007-01-01'
3132

@@ -84,7 +85,7 @@ def validate_date(text):
8485
test_parts = parts[:]
8586
while len(test_parts) < 3:
8687
test_parts.append(1)
87-
date = datetime.datetime(*(int(part) for part in test_parts))
88+
date = datetime.date(*(int(part) for part in test_parts))
8889
iso_date = date.strftime(ISO_DATE_FMT)
8990
iso_date = iso_date[:1+len(parts)*3]
9091
if iso_date < PODT_EARLIEST_TEMPLATE:
@@ -95,7 +96,7 @@ def validate_date(text):
9596
def gen_month_dates(iso_month):
9697
first = datetime.datetime.strptime(iso_month+'-01', ISO_DATE_FMT)
9798
one_day = datetime.timedelta(days=1)
98-
date = first
99+
date = first.date()
99100
while date.month == first.month:
100101
yield date.strftime(ISO_DATE_FMT)
101102
date += one_day
@@ -115,6 +116,26 @@ def gen_dates(iso_parts):
115116
yield iso_parts
116117

117118

119+
def get_picture_urls(dates, verbose=False, save_fixture=False):
120+
urls = []
121+
count = 0
122+
for date in dates:
123+
try:
124+
url = get_picture_url(date)
125+
except NoPictureForDate as exc:
126+
if verbose:
127+
print('*** {!r} ***'.format(exc))
128+
continue
129+
count += 1
130+
if verbose:
131+
print(format(count, '3d'), end=' ')
132+
print(url.split('/')[-1])
133+
else:
134+
print(url)
135+
urls.append(url)
136+
return urls
137+
138+
118139
def parse_args(argv):
119140
parser = argparse.ArgumentParser(description=main.__doc__)
120141
date_help = 'YYYY-MM-DD or YYYY-MM or YYYY: year, month and day'
@@ -123,6 +144,8 @@ def parse_args(argv):
123144
help='maximum number of items to fetch')
124145
parser.add_argument('-u', '--url_only', action='store_true',
125146
help='get picture URLS only')
147+
parser.add_argument('-f', '--fixture_save', action='store_true',
148+
help='save data for local test fixture')
126149
parser.add_argument('-v', '--verbose', action='store_true',
127150
help='display progress information')
128151
args = parser.parse_args(argv)
@@ -145,34 +168,14 @@ def parse_args(argv):
145168
return dates, args
146169

147170

148-
def get_picture_urls(dates, verbose=False):
149-
urls = []
150-
count = 0
151-
for date in dates:
152-
try:
153-
url = get_picture_url(date)
154-
except NoPictureForDate as exc:
155-
if verbose:
156-
print('*** {!r} ***'.format(exc))
157-
continue
158-
count += 1
159-
if verbose:
160-
print(format(count, '3d'), end=' ')
161-
print(url.split('/')[-1])
162-
else:
163-
print(url)
164-
urls.append(url)
165-
return urls
166-
167-
168171
def main(argv, get_picture_urls):
169172
"""Get Wikipedia "Picture of The Day" for date, month or year"""
170173

171174
dates, args = parse_args(argv)
172175

173176
t0 = time.time()
174177

175-
urls = get_picture_urls(dates, args.verbose)
178+
urls = get_picture_urls(dates, args.verbose, args.fixture_save)
176179

177180
elapsed = time.time() - t0
178181
if args.verbose:

concurrency/wikipedia/daypicts_asyncio.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,19 @@
66
import asyncio
77
import aiohttp
88

9-
from daypicts import main
10-
from daypicts import NoPictureForDate
11-
from daypicts import POTD_BASE_URL
12-
from daypicts import POTD_IMAGE_RE
9+
from daypicts import main, NoPictureForDate
10+
from daypicts import POTD_BASE_URL, POTD_IMAGE_RE
1311

1412
GLOBAL_TIMEOUT = 300 # seconds
13+
MAX_CONCURRENT_REQUESTS = 30
1514

1615

1716
@asyncio.coroutine
18-
def get_picture_url(iso_date):
17+
def get_picture_url(iso_date, semaphore):
1918
page_url = POTD_BASE_URL+iso_date
20-
response = yield from aiohttp.request('GET', page_url)
21-
text = yield from response.text()
19+
with (yield from semaphore):
20+
response = yield from aiohttp.request('GET', page_url)
21+
text = yield from response.text()
2222
pict_url = POTD_IMAGE_RE.search(text)
2323
if pict_url is None:
2424
raise NoPictureForDate(iso_date)
@@ -27,7 +27,8 @@ def get_picture_url(iso_date):
2727

2828
@asyncio.coroutine
2929
def get_picture_urls(dates, verbose=False):
30-
tasks = [get_picture_url(date) for date in dates]
30+
semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
31+
tasks = [get_picture_url(date, semaphore) for date in dates]
3132
urls = []
3233
count = 0
3334
# get results as jobs are done

concurrency/wikipedia/daypicts_threads.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77

88
from daypicts import main, get_picture_url, NoPictureForDate
99

10-
MAX_NUM_THREADS = 400
1110
GLOBAL_TIMEOUT = 300 # seconds
11+
MAX_CONCURRENT_REQUESTS = 30
1212

1313

1414
def get_picture_urls(dates, verbose=False):
15-
num_threads = min(len(dates), MAX_NUM_THREADS)
16-
pool = futures.ThreadPoolExecutor(num_threads)
15+
pool = futures.ThreadPoolExecutor(MAX_CONCURRENT_REQUESTS)
1716

1817
pending = {}
1918
for date in dates:

0 commit comments

Comments
 (0)