Skip to content

Commit a7e8af7

Browse files
authored
Merge pull request #35 from QuantGov/dev
1.1.0
2 parents 71aeecc + 2ea6361 commit a7e8af7

File tree

3 files changed

+98
-84
lines changed

3 files changed

+98
-84
lines changed

regcensus/api.py

Lines changed: 68 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515

1616
def get_values(series, jurisdiction, year, documentType=1, summary=True,
1717
dateIsRange=True, country=False, agency=None, cluster=None,
18-
label=None, industry=None, filtered=True,
19-
labellevel=3, industryLevel=None,
18+
label=None, industry=None, labellevel=3, industryLevel=None,
2019
labelsource='NAICS', version=None,
2120
download=False, page=None, date=None, verbose=0):
2221
"""
@@ -38,8 +37,6 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
3837
label (formerly industry) (optional):
3938
Industry code using the jurisdiction-specific
4039
coding system (returns all 3-digit industries by default)
41-
filtered (optional): Exclude poorly-performing industry results
42-
(use of unfiltered results is NOT recommended)
4340
labellevel (formerly industryLevel) (optional):
4441
Level of NAICS industries to include
4542
version (optional): Version ID for datasets with multiple versions
@@ -58,7 +55,7 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
5855
return
5956

6057
# If multiple jurisdiction names are given, find list of IDs
61-
if type(jurisdiction) == list and re.search(
58+
if isinstance(jurisdiction, list) and re.search(
6259
r'[A-Za-z]', str(jurisdiction[0])):
6360
jurisdiction = [list_jurisdictions()[i] for i in jurisdiction]
6461
# If jurisdiction name is passed, find ID
@@ -91,12 +88,13 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
9188
get_datafinder(
9289
jurisdiction, documentType).to_string(index=False))
9390
except TypeError:
94-
print("Valid jurisdiction ID required. Consider the following:\n")
91+
print("Valid jurisdiction ID required. "
92+
"Consider the following:\n")
9593
pp.pprint(list_jurisdictions())
9694
return
9795

9896
# If multiple series are given, parses the list into a string
99-
if type(series) == list:
97+
if isinstance(series, list):
10098
url_call += f'series={",".join(str(i) for i in series)}'
10199
elif type(series) in [int, str]:
102100
url_call += f'series={series}'
@@ -108,7 +106,7 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
108106
return
109107

110108
# If multiple jurisdiction IDs are given, parses the list into a string
111-
if type(jurisdiction) == list:
109+
if isinstance(jurisdiction, list):
112110
url_call += f'&jurisdiction={",".join(str(i) for i in jurisdiction)}'
113111
# If jurisdiction is just an ID, use jurisdiction
114112
elif type(jurisdiction) in [int, str]:
@@ -121,13 +119,13 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
121119
return
122120

123121
# If multiple agencies are given, parses the list into a string
124-
if type(agency) == list:
122+
if isinstance(agency, list):
125123
url_call += f'&agency={",".join(str(i) for i in agency)}'
126124
elif agency:
127125
url_call += f'&agency={agency}'
128126

129127
# If multiple clusters are given, parses the list into a string
130-
if type(cluster) == list:
128+
if isinstance(cluster, list):
131129
url_call += f'&cluster={",".join(str(i) for i in cluster)}'
132130
elif cluster:
133131
url_call += f'&cluster={cluster}'
@@ -140,7 +138,7 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
140138
print('WARNING: industryLevel is deprecated; use labellevel')
141139
labellevel = industryLevel
142140
# If multiple industries are given, parses the list into a string
143-
if type(label) == list:
141+
if isinstance(label, list):
144142
if labelsource == 'NAICS':
145143
label = [list_industries(labellevel=labellevel,
146144
labelsource=labelsource,
@@ -157,13 +155,20 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
157155
url_call += f'&labelLevel={labellevel}'
158156

159157
# If multiple years are given, parses the list into a string
160-
if not summary and type(year) == list:
158+
if not summary and isinstance(year, list):
161159
print(
162160
'WARNING: document-level data is only returnable for a single '
163161
'year at a time. Returning the first year requested.'
164162
)
165163
year = year[0]
166-
if type(year) == list:
164+
# Shows warning for returning document-level data before 2020
165+
if not summary and int(year) <= 2019:
166+
print(
167+
'WARNING: The document_reference column for document-level data '
168+
'for 2019 and before is not compatible with years 2020-2023. '
169+
'These data will be compatible in version 6.0.'
170+
)
171+
if isinstance(year, list):
167172
# If dateIsRange, parses the list to include all years
168173
if dateIsRange and len(year) == 2:
169174
year = range(int(year[0]), int(year[1]) + 1)
@@ -188,13 +193,6 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
188193
'This query make take several minutes.')
189194
url_call = url_call.replace('/summary', '/documents')
190195

191-
# Allows for unfiltered industry results to be retrieved. Includes
192-
# warning message explaining that these results should not be trusted.
193-
if label and not filtered:
194-
print('WARNING: Returning unfiltered industry results. '
195-
'Use of these results is NOT recommended.')
196-
url_call += '&filteredOnly=false'
197-
198196
# Adds documentType argument (default is 1 in API)
199197
if documentType:
200198
url_call += f'&documenttype={documentType}'
@@ -242,7 +240,7 @@ def get_values(series, jurisdiction, year, documentType=1, summary=True,
242240

243241
# If download path is given, write csv instead of returning dataframe
244242
if download:
245-
if type(download) == str:
243+
if isinstance(download, str):
246244
clean_columns(output).to_csv(download, index=False)
247245
else:
248246
print("Valid outpath required to download.")
@@ -257,8 +255,8 @@ def get_document_values(*args, **kwargs):
257255
258256
Simply returns get_values() with summary=False
259257
"""
260-
if type(kwargs["year"]) == list:
261-
print_error({"message" : "Only single year can be passed."})
258+
if isinstance(kwargs["year"], list):
259+
print_error({"message": "Only single year can be passed."})
262260
return
263261
return get_values(*args, **kwargs, summary=False)
264262

@@ -306,9 +304,9 @@ def get_endpoint(series, jurisdiction, year, documentType, summary=True):
306304
307305
Returns the endpoint, e.g. '/state-summary' for summary-level state data
308306
"""
309-
if type(year) == list:
307+
if isinstance(year, list):
310308
year = [int(y) for y in year]
311-
if type(series) == list:
309+
if isinstance(series, list):
312310
series = [int(s) for s in series]
313311
try:
314312
datafinder = get_datafinder(jurisdiction, documentType).query(
@@ -334,9 +332,7 @@ def get_series(verbose=0):
334332
335333
Returns: pandas dataframe with the metadata
336334
"""
337-
url_call = series_url()
338-
if verbose:
339-
print(f'API call: {url_call}')
335+
url_call = series_url(verbose)
340336
return clean_columns(json_normalize(
341337
json.loads(requests.get(url_call).json())))
342338

@@ -350,11 +346,9 @@ def get_agencies(jurisdictionID=None, keyword=None, verbose=0):
350346
351347
Returns: pandas dataframe with the metadata
352348
"""
353-
url_call = agency_url(jurisdictionID, keyword)
349+
url_call = agency_url(jurisdictionID, keyword, verbose)
354350
if not url_call:
355351
return
356-
if verbose:
357-
print(f'API call: {url_call}')
358352
return clean_columns(json_normalize(
359353
json.loads(requests.get(url_call).json())))
360354

@@ -368,9 +362,7 @@ def get_jurisdictions(verbose=0):
368362
369363
Returns: pandas dataframe with the metadata
370364
"""
371-
url_call = jurisdictions_url()
372-
if verbose:
373-
print(f'API call: {url_call}')
365+
url_call = jurisdictions_url(verbose)
374366
return clean_columns(json_normalize(
375367
json.loads(requests.get(url_call).json())))
376368

@@ -387,9 +379,7 @@ def get_industries(keyword=None, labellevel=3, labelsource=None, verbose=0):
387379
388380
Returns: pandas dataframe with the metadata
389381
"""
390-
url_call = industries_url(keyword, labellevel, labelsource)
391-
if verbose:
392-
print(f'API call: {url_call}')
382+
url_call = industries_url(keyword, labellevel, labelsource, verbose)
393383
return clean_columns(json_normalize(
394384
json.loads(requests.get(url_call).json())))
395385

@@ -480,15 +470,15 @@ def list_document_types(jurisdictionID=None, reverse=False, verbose=0):
480470

481471

482472
@Memoized
483-
def list_series(reverse=False):
473+
def list_series(reverse=False, verbose=0):
484474
"""
485475
Args:
486476
jurisdictionID: ID for the jurisdiction
487477
documentType (optional): ID for type of document
488478
489479
Returns: dictionary containing names of series and associated IDs
490480
"""
491-
url_call = series_url()
481+
url_call = series_url(verbose)
492482
content = json.loads(requests.get(url_call).json())
493483
if reverse:
494484
return dict(sorted({
@@ -514,18 +504,22 @@ def list_dates(jurisdictionID, documentType=None):
514504

515505

516506
@Memoized
517-
def list_agencies(jurisdictionID=None, keyword=None, reverse=False):
507+
def list_agencies(jurisdictionID=None, keyword=None, reverse=False, verbose=0):
518508
"""
519509
Args:
520510
jurisdictionID: ID for the jurisdiction
521511
keyword: search for keyword in agency name
522512
523513
Returns: dictionary containing names of agencies and associated IDs
524514
"""
525-
url_call = agency_url(jurisdictionID, keyword)
526-
if not url_call:
515+
# Removes duplicate agency names (uses only most recent)
516+
df = get_agencies(jurisdictionID, keyword, verbose)
517+
if isinstance(df, type(None)):
527518
return
528-
content = json.loads(requests.get(url_call).json())
519+
df = df.sort_values(
520+
'agency_id', ascending=False).drop_duplicates(
521+
'agency_name', keep='first')
522+
content = json.loads(df.T.to_json())
529523

530524
jurisdictions_df = get_jurisdictions()
531525
jurisdiction_id_name = dict(zip(jurisdictions_df["jurisdiction_id"],
@@ -536,22 +530,28 @@ def list_agencies(jurisdictionID=None, keyword=None, reverse=False):
536530
if keyword:
537531
return dict(sorted({
538532
a["agency_id"]:
539-
f'{a["agency_name"]} ({jurisdiction_id_name[int(a["a_jurisdiction_id"])]})'
540-
for a in content if a["agency_name"]}.items()))
533+
f'{a["agency_name"]} '
534+
f'({jurisdiction_id_name[int(a["a_jurisdiction_id"])]})'
535+
for a in content.values()
536+
if a["agency_name"]}.items()))
541537
else:
542538
return dict(sorted({
543539
a["agency_id"]: a["agency_name"]
544-
for a in content if a["agency_name"]}.items()))
540+
for a in content.values()
541+
if a["agency_name"]}.items()))
545542
else:
546543
if keyword:
547544
return dict(sorted({
548-
f'{a["agency_name"]} ({jurisdiction_id_name[int(a["a_jurisdiction_id"])]})':
545+
f'{a["agency_name"]} '
546+
f'({jurisdiction_id_name[int(a["a_jurisdiction_id"])]})':
549547
a["agency_id"]
550-
for a in content if a["agency_name"]}.items()))
548+
for a in content.values()
549+
if a["agency_name"]}.items()))
551550
else:
552551
return dict(sorted({
553552
a["agency_name"]: a["agency_id"]
554-
for a in content if a["agency_name"]}.items()))
553+
for a in content.values()
554+
if a["agency_name"]}.items()))
555555

556556

557557
@Memoized
@@ -630,14 +630,20 @@ def list_industries(
630630
i["label_name"]: i["label_id"] for i in content}.items()))
631631

632632

633-
def series_url():
633+
def series_url(verbose=0):
634634
"""Gets url call for dataseries endpoint."""
635+
url_call = URL + '/dataseries'
636+
if verbose:
637+
print(f'API call: {url_call}')
635638
return URL + '/dataseries'
636639

637640

638-
def agency_url(jurisdictionID, keyword):
641+
def agency_url(jurisdictionID, keyword, verbose=0):
639642
"""Gets url call for agencies endpoint."""
640-
if keyword:
643+
if keyword and jurisdictionID:
644+
url_call = URL + (f'/agencies-keyword?'
645+
f'keyword={keyword}&jurisdiction={jurisdictionID}')
646+
elif keyword:
641647
url_call = URL + (f'/agencies-keyword?'
642648
f'keyword={keyword}')
643649
elif jurisdictionID:
@@ -646,15 +652,20 @@ def agency_url(jurisdictionID, keyword):
646652
else:
647653
print('Must include either "jurisdictionID" or "keyword."')
648654
return
655+
if verbose:
656+
print(f'API call: {url_call}')
649657
return url_call
650658

651659

652-
def jurisdictions_url():
660+
def jurisdictions_url(verbose=0):
653661
"""Gets url call for jurisdictions endpoint."""
654-
return URL + '/jurisdictions/'
662+
url_call = URL + '/jurisdictions/'
663+
if verbose:
664+
print(f'API call: {url_call}')
665+
return url_call
655666

656667

657-
def industries_url(keyword, labellevel, labelsource):
668+
def industries_url(keyword, labellevel, labelsource, verbose=0):
658669
"""Gets url call for label (formerly industries) endpoint."""
659670
if keyword:
660671
url_call = (
@@ -664,6 +675,8 @@ def industries_url(keyword, labellevel, labelsource):
664675
url_call = URL + f'/labels?labellevel={labellevel}'
665676
if labelsource:
666677
url_call += f'&labelsource={labelsource}'
678+
if verbose:
679+
print(f'API call: {url_call}')
667680
return url_call
668681

669682

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
setup(
66
name='regcensus',
7-
version='1.0.1',
7+
version='1.1.0',
88
description='Python package for accessing data from the QuantGov API',
99
url='https://github.com/QuantGov/regcensus-api-python',
1010
author='QuantGov',

0 commit comments

Comments
 (0)