1515
1616def get_values (series , jurisdiction , year , documentType = 1 , summary = True ,
1717 dateIsRange = True , country = False , agency = None , cluster = None ,
18- label = None , industry = None , filtered = True ,
19- labellevel = 3 , industryLevel = None ,
18+ label = None , industry = None , labellevel = 3 , industryLevel = None ,
2019 labelsource = 'NAICS' , version = None ,
2120 download = False , page = None , date = None , verbose = 0 ):
2221 """
@@ -38,8 +37,6 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
3837 label (formerly industry) (optional):
3938 Industry code using the jurisdiction-specific
4039 coding system (returns all 3-digit industries by default)
41- filtered (optional): Exclude poorly-performing industry results
42- (use of unfiltered results is NOT recommended)
4340 labellevel (formerly industryLevel) (optional):
4441 Level of NAICS industries to include
4542 version (optional): Version ID for datasets with multiple versions
@@ -58,7 +55,7 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
5855 return
5956
6057 # If multiple jurisdiction names are given, find list of IDs
61- if type (jurisdiction ) == list and re .search (
58+ if isinstance (jurisdiction , list ) and re .search (
6259 r'[A-Za-z]' , str (jurisdiction [0 ])):
6360 jurisdiction = [list_jurisdictions ()[i ] for i in jurisdiction ]
6461 # If jurisdiction name is passed, find ID
@@ -91,12 +88,13 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
9188 get_datafinder (
9289 jurisdiction , documentType ).to_string (index = False ))
9390 except TypeError :
94- print ("Valid jurisdiction ID required. Consider the following:\n " )
91+ print ("Valid jurisdiction ID required. "
92+ "Consider the following:\n " )
9593 pp .pprint (list_jurisdictions ())
9694 return
9795
9896 # If multiple series are given, parses the list into a string
99- if type (series ) == list :
97+ if isinstance (series , list ) :
10098 url_call += f'series={ "," .join (str (i ) for i in series )} '
10199 elif type (series ) in [int , str ]:
102100 url_call += f'series={ series } '
@@ -108,7 +106,7 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
108106 return
109107
110108 # If multiple jurisdiction IDs are given, parses the list into a string
111- if type (jurisdiction ) == list :
109+ if isinstance (jurisdiction , list ) :
112110 url_call += f'&jurisdiction={ "," .join (str (i ) for i in jurisdiction )} '
113111 # If jurisdiction is just an ID, use jurisdiction
114112 elif type (jurisdiction ) in [int , str ]:
@@ -121,13 +119,13 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
121119 return
122120
123121 # If multiple agencies are given, parses the list into a string
124- if type (agency ) == list :
122+ if isinstance (agency , list ) :
125123 url_call += f'&agency={ "," .join (str (i ) for i in agency )} '
126124 elif agency :
127125 url_call += f'&agency={ agency } '
128126
129127 # If multiple clusters are given, parses the list into a string
130- if type (cluster ) == list :
128+ if isinstance (cluster , list ) :
131129 url_call += f'&cluster={ "," .join (str (i ) for i in cluster )} '
132130 elif cluster :
133131 url_call += f'&cluster={ cluster } '
@@ -140,7 +138,7 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
140138 print ('WARNING: industryLevel is deprecated; use labellevel' )
141139 labellevel = industryLevel
142140 # If multiple industries are given, parses the list into a string
143- if type (label ) == list :
141+ if isinstance (label , list ) :
144142 if labelsource == 'NAICS' :
145143 label = [list_industries (labellevel = labellevel ,
146144 labelsource = labelsource ,
@@ -157,13 +155,20 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
157155 url_call += f'&labelLevel={ labellevel } '
158156
159157 # If multiple years are given, parses the list into a string
160- if not summary and type (year ) == list :
158+ if not summary and isinstance (year , list ) :
161159 print (
162160 'WARNING: document-level data is only returnable for a single '
163161 'year at a time. Returning the first year requested.'
164162 )
165163 year = year [0 ]
166- if type (year ) == list :
164+ # Shows warning for returning document-level data before 2020
165+ if not summary and int (year ) <= 2019 :
166+ print (
167+ 'WARNING: The document_reference column for document-level data '
168+ 'for 2019 and before is not compatible with years 2020-2023. '
169+ 'These data will be compatible in version 6.0.'
170+ )
171+ if isinstance (year , list ):
167172 # If dateIsRange, parses the list to include all years
168173 if dateIsRange and len (year ) == 2 :
169174 year = range (int (year [0 ]), int (year [1 ]) + 1 )
@@ -188,13 +193,6 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
188193 'This query make take several minutes.' )
189194 url_call = url_call .replace ('/summary' , '/documents' )
190195
191- # Allows for unfiltered industry results to be retrieved. Includes
192- # warning message explaining that these results should not be trusted.
193- if label and not filtered :
194- print ('WARNING: Returning unfiltered industry results. '
195- 'Use of these results is NOT recommended.' )
196- url_call += '&filteredOnly=false'
197-
198196 # Adds documentType argument (default is 1 in API)
199197 if documentType :
200198 url_call += f'&documenttype={ documentType } '
@@ -242,7 +240,7 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
242240
243241 # If download path is given, write csv instead of returning dataframe
244242 if download :
245- if type (download ) == str :
243+ if isinstance (download , str ) :
246244 clean_columns (output ).to_csv (download , index = False )
247245 else :
248246 print ("Valid outpath required to download." )
@@ -257,8 +255,8 @@ def get_document_values(*args, **kwargs):
257255
258256 Simply returns get_values() with summary=False
259257 """
260- if type (kwargs ["year" ]) == list :
261- print_error ({"message" : "Only single year can be passed." })
258+ if isinstance (kwargs ["year" ], list ) :
259+ print_error ({"message" : "Only single year can be passed." })
262260 return
263261 return get_values (* args , ** kwargs , summary = False )
264262
@@ -306,9 +304,9 @@ def get_endpoint(series, jurisdiction, year, documentType, summary=True):
306304
307305 Returns the endpoint, e.g. '/state-summary' for summary-level state data
308306 """
309- if type (year ) == list :
307+ if isinstance (year , list ) :
310308 year = [int (y ) for y in year ]
311- if type (series ) == list :
309+ if isinstance (series , list ) :
312310 series = [int (s ) for s in series ]
313311 try :
314312 datafinder = get_datafinder (jurisdiction , documentType ).query (
@@ -334,9 +332,7 @@ def get_series(verbose=0):
334332
335333 Returns: pandas dataframe with the metadata
336334 """
337- url_call = series_url ()
338- if verbose :
339- print (f'API call: { url_call } ' )
335+ url_call = series_url (verbose )
340336 return clean_columns (json_normalize (
341337 json .loads (requests .get (url_call ).json ())))
342338
@@ -350,11 +346,9 @@ def get_agencies(jurisdictionID=None, keyword=None, verbose=0):
350346
351347 Returns: pandas dataframe with the metadata
352348 """
353- url_call = agency_url (jurisdictionID , keyword )
349+ url_call = agency_url (jurisdictionID , keyword , verbose )
354350 if not url_call :
355351 return
356- if verbose :
357- print (f'API call: { url_call } ' )
358352 return clean_columns (json_normalize (
359353 json .loads (requests .get (url_call ).json ())))
360354
@@ -368,9 +362,7 @@ def get_jurisdictions(verbose=0):
368362
369363 Returns: pandas dataframe with the metadata
370364 """
371- url_call = jurisdictions_url ()
372- if verbose :
373- print (f'API call: { url_call } ' )
365+ url_call = jurisdictions_url (verbose )
374366 return clean_columns (json_normalize (
375367 json .loads (requests .get (url_call ).json ())))
376368
@@ -387,9 +379,7 @@ def get_industries(keyword=None, labellevel=3, labelsource=None, verbose=0):
387379
388380 Returns: pandas dataframe with the metadata
389381 """
390- url_call = industries_url (keyword , labellevel , labelsource )
391- if verbose :
392- print (f'API call: { url_call } ' )
382+ url_call = industries_url (keyword , labellevel , labelsource , verbose )
393383 return clean_columns (json_normalize (
394384 json .loads (requests .get (url_call ).json ())))
395385
@@ -480,15 +470,15 @@ def list_document_types(jurisdictionID=None, reverse=False, verbose=0):
480470
481471
482472@Memoized
483- def list_series (reverse = False ):
473+ def list_series (reverse = False , verbose = 0 ):
484474 """
485475 Args:
486476 jurisdictionID: ID for the jurisdiction
487477 documentType (optional): ID for type of document
488478
489479 Returns: dictionary containing names of series and associated IDs
490480 """
491- url_call = series_url ()
481+ url_call = series_url (verbose )
492482 content = json .loads (requests .get (url_call ).json ())
493483 if reverse :
494484 return dict (sorted ({
@@ -514,18 +504,22 @@ def list_dates(jurisdictionID, documentType=None):
514504
515505
516506@Memoized
517- def list_agencies (jurisdictionID = None , keyword = None , reverse = False ):
507+ def list_agencies (jurisdictionID = None , keyword = None , reverse = False , verbose = 0 ):
518508 """
519509 Args:
520510 jurisdictionID: ID for the jurisdiction
521511 keyword: search for keyword in agency name
522512
523513 Returns: dictionary containing names of agencies and associated IDs
524514 """
525- url_call = agency_url (jurisdictionID , keyword )
526- if not url_call :
515+ # Removes duplicate agency names (uses only most recent)
516+ df = get_agencies (jurisdictionID , keyword , verbose )
517+ if isinstance (df , type (None )):
527518 return
528- content = json .loads (requests .get (url_call ).json ())
519+ df = df .sort_values (
520+ 'agency_id' , ascending = False ).drop_duplicates (
521+ 'agency_name' , keep = 'first' )
522+ content = json .loads (df .T .to_json ())
529523
530524 jurisdictions_df = get_jurisdictions ()
531525 jurisdiction_id_name = dict (zip (jurisdictions_df ["jurisdiction_id" ],
@@ -536,22 +530,28 @@ def list_agencies(jurisdictionID=None, keyword=None, reverse=False):
536530 if keyword :
537531 return dict (sorted ({
538532 a ["agency_id" ]:
539- f'{ a ["agency_name" ]} ({ jurisdiction_id_name [int (a ["a_jurisdiction_id" ])]} )'
540- for a in content if a ["agency_name" ]}.items ()))
533+ f'{ a ["agency_name" ]} '
534+ f'({ jurisdiction_id_name [int (a ["a_jurisdiction_id" ])]} )'
535+ for a in content .values ()
536+ if a ["agency_name" ]}.items ()))
541537 else :
542538 return dict (sorted ({
543539 a ["agency_id" ]: a ["agency_name" ]
544- for a in content if a ["agency_name" ]}.items ()))
540+ for a in content .values ()
541+ if a ["agency_name" ]}.items ()))
545542 else :
546543 if keyword :
547544 return dict (sorted ({
548- f'{ a ["agency_name" ]} ({ jurisdiction_id_name [int (a ["a_jurisdiction_id" ])]} )' :
545+ f'{ a ["agency_name" ]} '
546+ f'({ jurisdiction_id_name [int (a ["a_jurisdiction_id" ])]} )' :
549547 a ["agency_id" ]
550- for a in content if a ["agency_name" ]}.items ()))
548+ for a in content .values ()
549+ if a ["agency_name" ]}.items ()))
551550 else :
552551 return dict (sorted ({
553552 a ["agency_name" ]: a ["agency_id" ]
554- for a in content if a ["agency_name" ]}.items ()))
553+ for a in content .values ()
554+ if a ["agency_name" ]}.items ()))
555555
556556
557557@Memoized
@@ -630,14 +630,20 @@ def list_industries(
630630 i ["label_name" ]: i ["label_id" ] for i in content }.items ()))
631631
632632
633- def series_url ():
633+ def series_url (verbose = 0 ):
634634 """Gets url call for dataseries endpoint."""
635+ url_call = URL + '/dataseries'
636+ if verbose :
637+ print (f'API call: { url_call } ' )
635638 return URL + '/dataseries'
636639
637640
638- def agency_url (jurisdictionID , keyword ):
641+ def agency_url (jurisdictionID , keyword , verbose = 0 ):
639642 """Gets url call for agencies endpoint."""
640- if keyword :
643+ if keyword and jurisdictionID :
644+ url_call = URL + (f'/agencies-keyword?'
645+ f'keyword={ keyword } &jurisdiction={ jurisdictionID } ' )
646+ elif keyword :
641647 url_call = URL + (f'/agencies-keyword?'
642648 f'keyword={ keyword } ' )
643649 elif jurisdictionID :
@@ -646,15 +652,20 @@ def agency_url(jurisdictionID, keyword):
646652 else :
647653 print ('Must include either "jurisdictionID" or "keyword."' )
648654 return
655+ if verbose :
656+ print (f'API call: { url_call } ' )
649657 return url_call
650658
651659
652- def jurisdictions_url ():
660+ def jurisdictions_url (verbose = 0 ):
653661 """Gets url call for jurisdictions endpoint."""
654- return URL + '/jurisdictions/'
662+ url_call = URL + '/jurisdictions/'
663+ if verbose :
664+ print (f'API call: { url_call } ' )
665+ return url_call
655666
656667
657- def industries_url (keyword , labellevel , labelsource ):
668+ def industries_url (keyword , labellevel , labelsource , verbose = 0 ):
658669 """Gets url call for label (formerly industries) endpoint."""
659670 if keyword :
660671 url_call = (
@@ -664,6 +675,8 @@ def industries_url(keyword, labellevel, labelsource):
664675 url_call = URL + f'/labels?labellevel={ labellevel } '
665676 if labelsource :
666677 url_call += f'&labelsource={ labelsource } '
678+ if verbose :
679+ print (f'API call: { url_call } ' )
667680 return url_call
668681
669682
0 commit comments