55import urllib .robotparser
66from test import support
77from test .support import socket_helper
8- from test .support import threading_helper
98from http .server import BaseHTTPRequestHandler , HTTPServer
109
1110
@@ -384,26 +383,25 @@ def test_string_formatting(self):
384383)
385384class BaseLocalNetworkTestCase :
386385
387- def setUp (self ):
386+ @classmethod
387+ def setUpClass (cls ):
388388 # clear _opener global variable
389- self . addCleanup (urllib .request .urlcleanup )
389+ cls . addClassCleanup (urllib .request .urlcleanup )
390390
391- self .server = HTTPServer ((socket_helper .HOST , 0 ), self .RobotHandler )
391+ cls .server = HTTPServer ((socket_helper .HOST , 0 ), cls .RobotHandler )
392+ cls .addClassCleanup (cls .server .server_close )
392393
393- self . t = threading .Thread (
394+ t = threading .Thread (
394395 name = 'HTTPServer serving' ,
395- target = self .server .serve_forever ,
396+ target = cls .server .serve_forever ,
396397 # Short poll interval to make the test finish quickly.
397398 # Time between requests is short enough that we won't wake
398399 # up spuriously too many times.
399400 kwargs = {'poll_interval' :0.01 })
400- self .t .daemon = True # In case this function raises.
401- self .t .start ()
402-
403- def tearDown (self ):
404- self .server .shutdown ()
405- self .t .join ()
406- self .server .server_close ()
401+ t .daemon = True # In case this function raises.
402+ t .start ()
403+ cls .addClassCleanup (t .join )
404+ cls .addClassCleanup (cls .server .shutdown )
407405
408406
409407SAMPLE_ROBOTS_TXT = b'''\
@@ -425,7 +423,6 @@ def do_GET(self):
425423 def log_message (self , format , * args ):
426424 pass
427425
428- @threading_helper .reap_threads
429426 def testRead (self ):
430427 # Test that reading a weird robots.txt doesn't fail.
431428 addr = self .server .server_address
@@ -447,24 +444,62 @@ def testRead(self):
447444 self .assertFalse (parser .can_fetch (agent , url + '/%2F[spam]/path' ))
448445
449446
450- class PasswordProtectedSiteTestCase (BaseLocalNetworkTestCase , unittest .TestCase ):
447+ class HttpErrorsTestCase (BaseLocalNetworkTestCase , unittest .TestCase ):
451448 class RobotHandler (BaseHTTPRequestHandler ):
452449
453450 def do_GET (self ):
454- self .send_error (403 , "Forbidden access" )
451+ self .send_error (self . server . return_code )
455452
456453 def log_message (self , format , * args ):
457454 pass
458455
459- @threading_helper .reap_threads
456+ def setUp (self ):
457+ # Make sure that a valid code is set in the test.
458+ self .server .return_code = None
459+
460460 def testPasswordProtectedSite (self ):
461+ self .server .return_code = 403
461462 addr = self .server .server_address
462463 url = 'http://' + socket_helper .HOST + ':' + str (addr [1 ])
463464 robots_url = url + "/robots.txt"
464465 parser = urllib .robotparser .RobotFileParser ()
465466 parser .set_url (url )
466467 parser .read ()
467468 self .assertFalse (parser .can_fetch ("*" , robots_url ))
469+ self .assertFalse (parser .can_fetch ("*" , url + '/some/file.html' ))
470+
471+ def testNotFound (self ):
472+ self .server .return_code = 404
473+ addr = self .server .server_address
474+ url = f'http://{ socket_helper .HOST } :{ addr [1 ]} '
475+ robots_url = url + "/robots.txt"
476+ parser = urllib .robotparser .RobotFileParser ()
477+ parser .set_url (url )
478+ parser .read ()
479+ self .assertTrue (parser .can_fetch ("*" , robots_url ))
480+ self .assertTrue (parser .can_fetch ("*" , url + '/path/file.html' ))
481+
482+ def testTeapot (self ):
483+ self .server .return_code = 418
484+ addr = self .server .server_address
485+ url = f'http://{ socket_helper .HOST } :{ addr [1 ]} '
486+ robots_url = url + "/robots.txt"
487+ parser = urllib .robotparser .RobotFileParser ()
488+ parser .set_url (url )
489+ parser .read ()
490+ self .assertTrue (parser .can_fetch ("*" , robots_url ))
491+ self .assertTrue (parser .can_fetch ("*" , url + '/pot-1?milk-type=Cream' ))
492+
493+ def testServiceUnavailable (self ):
494+ self .server .return_code = 503
495+ addr = self .server .server_address
496+ url = f'http://{ socket_helper .HOST } :{ addr [1 ]} '
497+ robots_url = url + "/robots.txt"
498+ parser = urllib .robotparser .RobotFileParser ()
499+ parser .set_url (url )
500+ parser .read ()
501+ self .assertFalse (parser .can_fetch ("*" , robots_url ))
502+ self .assertFalse (parser .can_fetch ("*" , url + '/path/file.html' ))
468503
469504
470505@support .requires_working_socket ()
0 commit comments