Merge pull request #11 from QuantGov/dev

jnelson16 · web-flow · commit f8d8840faf37 · 2020-06-30T12:00:55.000-04:00
Version 0.2
diff --git a/.gitignore b/.gitignore
@@ -1,9 +1,8 @@
 .DS_Store
-
 *.egg-info
-
-__pycache__
-
+*__pycache__
 build/
-
-dist/
+dist/
+.coverage
+*tox*
+.python-version
diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+_The current version of RegCensusAPI is only compatible with Python 3.6 and newer._
+
 # RegCensus API
 
 ## Introduction
@@ -13,7 +15,7 @@ The RegCensus Python library is pip installable:
 $ pip install regcensus
 ```
 
-Once installed, import the library, using the following (using the `rc` alias to more easily use the library):
+Once installed, import the library, using the following (use the `rc` alias to more easily use the library):
 
 ```
 import regcensus as rc
@@ -100,6 +102,8 @@ The __get_values__ function is the primary function for obtaining RegData from t
 * filtered (optional) - specify if poorly-performing industry results should be excluded. Default is True.
 * summary (optional) - specify if summary results should be returned, instead of document-level results. Default is True.
 * country (optional) - specify if all values for a country's jurisdiction ID should be returned. Default is False.
+* industryType (optional): level of NAICS industries to include. Default is '3-Digit'.
+* download (optional): if not False, a path location for a downloaded csv of the results.
 * verbose (optional) - value specifying how much debugging information should be printed for each function call. Higher number specifies more information, default is 0.
 
 In the example below, we are interested in the total number of restrictions and total number of words for the US (get_jurisdictions(38)) for the period 2010 to 2019.
@@ -108,6 +112,14 @@ In the example below, we are interested in the total number of restrictions and
 rc.get_values(series = [1,2], jurisdiction = 38, date = [2010, 2019])
 ```
 
+### Get all Values for a Country
+
+The `country` argument can be used to get all values for one or multiple series for a specific national jurisdiction. The following line will get you a summary of the national and state-level restriction counts for the United States from 2016 to 2019:
+
+```
+rc.get_values(series = 1, jurisdiction = 38, date = [2016, 2019], country=True)
+```
+
 ### Values by Subgroup
 
 You can obtain data for any of the three subgroups for each series - agencies, industries, and occupations (when they become available).
@@ -168,5 +180,18 @@ agency_restrictions_ind = agency_by_industry.merge(
     agencies, by='agency_id')
 ```
 
+## Downloading Data
+
+There are two different ways to download data retrieved from RegCensusAPI:
+
+1. Use the pandas `df.to_csv(outpath)` function, which allows the user to download a csv of the data, with the given outpath. See the pandas [documentation][3] for more features.
+
+2. As of version 0.2.0, the __get_values__ function includes a `download` argument, which allows the user to simply download a csv of the data in the same line as the API call. See below for an example of this call.
+
+```
+rc.get_values(series = [1,2], jurisdiction = 38, date = [2010, 2019], download='regdata2010to2019.csv')
+```
+
 [1]:https://api.quantgov.org/swagger-ui.html
 [2]:https://www.quantgov.org/download-interactively
+[3]:https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html
diff --git a/regcensus/api.py b/regcensus/api.py
@@ -12,7 +12,8 @@
 
 def get_values(series, jurisdiction, date, filtered=True, summary=True,
                documentType=3, agency=None, industry=None, dateIsRange=True,
-               country=False, industryType='3-Digit', verbose=0):
+               country=False, industryType='3-Digit',
+               download=False, verbose=0):
     """
     Get values for a specific jurisdition and series
 
@@ -23,12 +24,17 @@ def get_values(series, jurisdiction, date, filtered=True, summary=True,
         summary (optional): Return summary instead of document level data
         filtered (optional): Exclude poorly-performing industry results
         documentType (optional): ID for type of document
-        agency (optional): Agency ID
+        agency (optional): Agency ID (use 'all' for all agencies,
+            only works for a single jurisdiction)
         industry (optional): Industry code using the jurisdiction-specific
             coding system (use 'all' for all industries)
         dateIsRange (optional): Indicating whether the time parameter is range
             or should be treated as single data points
         country (optional): Get all values for country ID
+        industryType (optional): Level of NAICS industries to include,
+            default is '3-Digit'
+        download (optional): If not False, a path location for a
+            downloaded csv of the results
         verbose (optional): Print out the url of the API call
 
     Returns: pandas dataframe with the values and various metadata
@@ -60,6 +66,9 @@ def get_values(series, jurisdiction, date, filtered=True, summary=True,
         pp.pprint(list_jurisdictions())
         return
 
+    # Allows for all agency data to be returned
+    if str(agency).lower() == 'all':
+        agency = list(list_agencies(jurisdiction).values())
     # If multiple agencies are given, parses the list into a string
     if type(agency) == list:
         url_call += f'&agency={",".join(str(i) for i in agency)}'
@@ -127,10 +136,16 @@ def get_values(series, jurisdiction, date, filtered=True, summary=True,
         print(f'API call: {url_call}')
 
     # Puts flattened JSON output into a pandas DataFrame
-    output = pd.io.json.json_normalize(requests.get(url_call).json())
+    output = json_normalize(requests.get(url_call).json())
     # Prints error message if call fails
     if (output.columns[:3] == ['title', 'status', 'detail']).all():
         print('WARNING:', output.iloc[0][-1])
+        return
+    elif download:
+        if type(download) == str:
+            clean_columns(output).to_csv(download, index=False)
+        else:
+            print("Valid outpath required to download.")
     # Returns clean data if no error
     else:
         return clean_columns(output)
@@ -144,21 +159,23 @@ def get_series(seriesID=''):
 
     Returns: pandas dataframe with the metadata
     """
-    output = pd.io.json.json_normalize(
+    output = json_normalize(
         requests.get(URL + f'/series/{seriesID}').json())
     return clean_columns(output)
 
 
-def get_agencies(agencyID=''):
+def get_agencies(jurisdictionID):
     """
-    Get metadata for all or one specific agency
+    Get metadata for all agencies of a specific jurisdiction
 
-    Args: agencyID (optional): ID for the agency
+    Args: jurisdictionID: ID for the jurisdiction
 
     Returns: pandas dataframe with the metadata
     """
-    output = pd.io.json.json_normalize(
-        requests.get(URL + f'/agencies/{agencyID}').json())
+    output = json_normalize(
+        requests.get(
+            URL + (f'/agencies/jurisdiction?'
+                   f'jurisdictions={jurisdictionID}')).json())
     return clean_columns(output)
 
 
@@ -170,7 +187,7 @@ def get_jurisdictions(jurisdictionID=''):
 
     Returns: pandas dataframe with the metadata
     """
-    output = pd.io.json.json_normalize(
+    output = json_normalize(
         requests.get(URL + f'/jurisdictions/{jurisdictionID}').json())
     return clean_columns(output)
 
@@ -185,12 +202,12 @@ def get_periods(jurisdictionID='', documentType=3):
     Returns: pandas dataframe with the dates
     """
     if jurisdictionID:
-        output = pd.io.json.json_normalize(
+        output = json_normalize(
             requests.get(
                 URL + (f'/periods?jurisdiction={jurisdictionID}&'
                        f'documentType={documentType}')).json())
     else:
-        output = pd.io.json.json_normalize(
+        output = json_normalize(
             requests.get(URL + f'/periods/available').json())
     return clean_columns(output)
 
@@ -203,9 +220,9 @@ def get_industries(jurisdictionID):
 
     Returns: pandas dataframe with the metadata
     """
-    output = pd.io.json.json_normalize(
-            requests.get(
-                URL + f'/industries?jurisdiction={jurisdictionID}').json())
+    output = json_normalize(
+        requests.get(
+            URL + f'/industries?jurisdiction={jurisdictionID}').json())
     return clean_columns(output)
 
 
@@ -220,11 +237,11 @@ def get_documents(jurisdictionID, documentType=3):
 
     Returns: pandas dataframe with the metadata
     """
-    output = pd.io.json.json_normalize(
+    output = json_normalize(
         requests.get(
             URL + (f'/documents?jurisdiction={jurisdictionID}&'
                    f'documentType={documentType}')
-            ).json())
+        ).json())
     return clean_columns(output)
 
 
@@ -246,11 +263,14 @@ def list_series():
     return dict(sorted({s["seriesName"]: s["seriesID"] for s in json}.items()))
 
 
-def list_agencies():
+def list_agencies(jurisdictionID):
     """
+    Args: jurisdictionID: ID for the jurisdiction
+
     Returns: dictionary containing names of agencies and associated IDs
     """
-    json = requests.get(URL + '/agencies').json()
+    json = requests.get(
+        URL + f'/agencies/jurisdiction?jurisdictions={jurisdictionID}').json()
     return dict(sorted({
         a["agencyName"]: a["agencyID"]
         for a in json if a["agencyName"]}.items()))
@@ -281,3 +301,11 @@ def clean_columns(df):
     """Removes JSON prefixes from column names"""
     df.columns = [c.split('.')[-1] for c in df.columns]
     return df
+
+
+def json_normalize(output):
+    """Backwards compatability for old versions of pandas"""
+    try:
+        return pd.json_normalize(output)
+    except AttributeError:
+        return pd.io.json.json_normalize(output)
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,5 @@
+[tool:pytest]
+addopts = --flake8 --cov
+flake8-ignore =
+    *.py F541 W503 W504
+    tests/* F401
diff --git a/setup.py b/setup.py
@@ -3,14 +3,14 @@
 
 
 setup(
-   name='regcensus',
-   version='0.1.4',
-   description='Python package for accessing data from the QuantGov API',
-   url='https://github.com/QuantGov/regcensus-api-python',
-   author='QuantGov',
-   author_email='quantgov.info@gmail.com',
-   packages=setuptools.find_packages(),
-   classifiers=[
+    name='regcensus',
+    version='0.2.0',
+    description='Python package for accessing data from the QuantGov API',
+    url='https://github.com/QuantGov/regcensus-api-python',
+    author='QuantGov',
+    author_email='quantgov.info@gmail.com',
+    packages=setuptools.find_packages(),
+    classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License",
         "Operating System :: OS Independent",
diff --git a/tests/test_api.py b/tests/test_api.py