Skip to content
This repository was archived by the owner on Oct 21, 2022. It is now read-only.

Commit 8a69da1

Browse files
authored
Merge pull request #183 from ylil93/issue175
use a strict whitelist for sanitizing packages
2 parents 7d40063 + 03fee77 commit 8a69da1

4 files changed

Lines changed: 365 additions & 23 deletions

File tree

compatibility_lib/compatibility_lib/configs.py

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@
1515
"""Common configs for compatibility_lib."""
1616

1717

18+
def _format_url(repo_name, setuppy_path=''):
19+
url = 'git%2Bgit://github.com/{}.git'.format(repo_name)
20+
if setuppy_path != '':
21+
url = '{}%23subdirectory={}'.format(url, setuppy_path)
22+
return url
23+
24+
1825
# IGNORED_DEPENDENCIES are not direct dependencies for many packages and are
1926
# not installed via pip, resulting in unresolvable high priority warnings.
2027
IGNORED_DEPENDENCIES = [
@@ -64,6 +71,95 @@
6471
'gcloud',
6572
]
6673

74+
WHITELIST_PKGS = PKG_LIST
75+
76+
WHITELIST_URLS = {
77+
_format_url('googleapis/google-cloud-python', 'asset'): 'gcloud',
78+
_format_url('googleapis/google-cloud-python', 'automl'): 'gcloud',
79+
_format_url('googleapis/google-cloud-python', 'dataproc'): 'gcloud',
80+
_format_url('googleapis/google-cloud-python', 'dlp'): 'gcloud',
81+
_format_url('googleapis/google-cloud-python', 'iot'): 'gcloud',
82+
_format_url('googleapis/google-cloud-python', 'kms'): 'gcloud',
83+
_format_url('googleapis/google-cloud-python', 'legacy/google-cloud'):
84+
'gcloud',
85+
_format_url('googleapis/google-cloud-python', 'ndb'): 'gcloud',
86+
_format_url('googleapis/google-cloud-python', 'oslogin'): 'gcloud',
87+
_format_url('googleapis/google-cloud-python', 'redis'): 'gcloud',
88+
_format_url('googleapis/google-cloud-python', 'securitycenter'): 'gcloud',
89+
_format_url('googleapis/google-cloud-python', 'tasks'): 'gcloud',
90+
_format_url('googleapis/google-cloud-python', 'test_utils'): 'gcloud',
91+
_format_url('googleapis/google-cloud-python', 'texttospeech'): 'gcloud',
92+
_format_url('googleapis/google-cloud-python', 'websecurityscanner'):
93+
'gcloud',
94+
_format_url('googleapis/google-cloud-python', 'api_core'):
95+
'google-api-core',
96+
_format_url('googleapis/google-cloud-python', 'bigquery'):
97+
'google-cloud-bigquery',
98+
_format_url('googleapis/google-cloud-python', 'bigquery_datatransfer'):
99+
'google-cloud-bigquery-datatransfer',
100+
_format_url('googleapis/google-cloud-python', 'bigtable'):
101+
'google-cloud-bigtable',
102+
_format_url('googleapis/google-cloud-python', 'container'):
103+
'google-cloud-container',
104+
_format_url('googleapis/google-cloud-python', 'core'):
105+
'google-cloud-core',
106+
_format_url('googleapis/google-cloud-python', 'datastore'):
107+
'google-cloud-datastore',
108+
_format_url('googleapis/google-cloud-python', 'dns'): 'google-cloud-dns',
109+
_format_url('googleapis/google-cloud-python', 'error_reporting'):
110+
'google-cloud-error-reporting',
111+
_format_url('googleapis/google-cloud-python', 'firestore'):
112+
'google-cloud-firestore',
113+
_format_url('googleapis/google-cloud-python', 'language'):
114+
'google-cloud-language',
115+
_format_url('googleapis/google-cloud-python', 'logging'):
116+
'google-cloud-logging',
117+
_format_url('googleapis/google-cloud-python', 'monitoring'):
118+
'google-cloud-monitoring',
119+
_format_url('googleapis/google-cloud-python', 'pubsub'):
120+
'google-cloud-pubsub',
121+
_format_url('googleapis/google-cloud-python', 'resource_manager'):
122+
'google-cloud-resource-manager',
123+
_format_url('googleapis/google-cloud-python', 'runtimeconfig'):
124+
'google-cloud-runtimeconfig',
125+
_format_url('googleapis/google-cloud-python', 'spanner'):
126+
'google-cloud-spanner',
127+
_format_url('googleapis/google-cloud-python', 'speech'):
128+
'google-cloud-speech',
129+
_format_url('googleapis/google-cloud-python', 'storage'):
130+
'google-cloud-storage',
131+
_format_url('googleapis/google-cloud-python', 'trace'):
132+
'google-cloud-trace',
133+
_format_url('googleapis/google-cloud-python', 'translate'):
134+
'google-cloud-translate',
135+
_format_url('googleapis/google-cloud-python', 'videointelligence'):
136+
'google-cloud-videointelligence',
137+
_format_url('googleapis/google-cloud-python', 'vision'):
138+
'google-cloud-vision',
139+
_format_url('googleapis/google-api-python-client'):
140+
'google-api-python-client',
141+
_format_url('googleapis/google-auth-library-python'): 'google-auth',
142+
_format_url('GoogleCloudPlatform/google-resumable-media-python'):
143+
'google-resumable-media',
144+
_format_url('apache/beam', 'sdks/python'): 'apache-beam[gcp]',
145+
_format_url('google/apitools'): 'google-apitools',
146+
_format_url('GoogleCloudPlatform/gsutil'): 'gsutil',
147+
_format_url('census-instrumentation/opencensus-python'): 'opencensus',
148+
_format_url('protocolbuffers/protobuf', 'python'): 'protobuf',
149+
_format_url('google/protorpc'): 'protorpc',
150+
_format_url('tensorflow/tensorflow', 'tensorflow/tools/pip_package'):
151+
'tensorflow',
152+
_format_url('tensorflow/tensorflow',
153+
'tensorflow/contrib/tpu/profiler/pip_package'): 'tensorflow',
154+
# TODO: The following projects do not use setup.py
155+
# googleapis-common-protos
156+
# grpc-google-iam-v1
157+
# grpcio
158+
# tensorboard - not sure what the build process is
159+
# _format_url('tensorflow/tensorboard', 'tensorboard/pip_package'):
160+
# 'tensorboard',
161+
}
162+
67163
# TODO: Find top 30 packages by download count in BigQuery table.
68164
THIRD_PARTY_PACKAGE_LIST = [
69165
'requests',
@@ -73,5 +169,5 @@
73169

74170
PKG_PY_VERSION_NOT_SUPPORTED = {
75171
2: ['tensorflow', ],
76-
3: ['google-cloud-dataflow', ],
172+
3: ['apache-beam[gcp]', 'gsutil', ],
77173
}

compatibility_server/compatibility_checker_server.py

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
import argparse
4141
import collections.abc
42+
import configs
4243
import json
4344
import logging
4445
import pprint
@@ -49,12 +50,6 @@
4950

5051
import pip_checker
5152

52-
# White list Google owned Python packages
53-
GITHUB_PREFIX = 'github.com/'
54-
WHITELIST_GITHUB_REPO = ['GoogleCloudPlatform/',
55-
'google/',
56-
'googleapis/']
57-
5853

5954
def _parse_python_version_to_interpreter_mapping(s):
6055
version_to_interpreter = {}
@@ -102,7 +97,7 @@ def _check(self, start_response, python_version, packages):
10297
[('Content-Type', 'text/plain; charset=utf-8')])
10398
return [b'Request must specify at least one package']
10499

105-
sanitized_packages = self._sanitize_packages(packages)
100+
sanitized_packages = _sanitize_packages(packages)
106101

107102
if sanitized_packages != packages:
108103
start_response('400 Bad Request',
@@ -152,21 +147,6 @@ def _check(self, start_response, python_version, packages):
152147
start_response('200 OK', [('Content-Type', 'application/json')])
153148
return [json.dumps(results).encode('utf-8')]
154149

155-
def _sanitize_packages(self, packages):
156-
# If checking github head version, only run checks for whitelisted
157-
# repos.
158-
sanitized_packages = []
159-
for pkg in packages:
160-
if GITHUB_PREFIX in pkg:
161-
for whitelist_repo in WHITELIST_GITHUB_REPO:
162-
github_whitelist = GITHUB_PREFIX + whitelist_repo
163-
if github_whitelist in pkg:
164-
sanitized_packages.append(pkg)
165-
else:
166-
sanitized_packages.append(pkg)
167-
168-
return sanitized_packages
169-
170150
def _wsgi_app(self, environ, start_response):
171151
if environ.get('REQUEST_METHOD') == 'GET':
172152
parameters = urllib.parse.parse_qs(environ.get('QUERY_STRING', ''))
@@ -206,6 +186,21 @@ def serve(self):
206186
self._httpd.serve_forever()
207187

208188

189+
def _sanitize_packages(packages):
190+
"""Checks if packages are whitelisted
191+
192+
Args:
193+
packages: a list of packages
194+
Returns:
195+
a subset of packages that are whitelisted
196+
"""
197+
sanitized_packages = []
198+
for pkg in packages:
199+
if pkg in configs.WHITELIST_PKGS or pkg in configs.WHITELIST_URLS:
200+
sanitized_packages.append(pkg)
201+
return sanitized_packages
202+
203+
209204
def main():
210205
logging.basicConfig(
211206
level=logging.INFO,

compatibility_server/configs.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
# Copyright 2018 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Common configs for compatibility_lib."""
16+
17+
18+
def _format_url(repo_name, setuppy_path=''):
19+
url = 'git%2Bgit://github.com/{}.git'.format(repo_name)
20+
if setuppy_path != '':
21+
url = '{}%23subdirectory={}'.format(url, setuppy_path)
22+
return url
23+
24+
25+
# IGNORED_DEPENDENCIES are not direct dependencies for many packages and are
26+
# not installed via pip, resulting in unresolvable high priority warnings.
27+
IGNORED_DEPENDENCIES = [
28+
'pip',
29+
'setuptools',
30+
'wheel',
31+
]
32+
33+
PKG_LIST = [
34+
'google-api-core',
35+
'google-api-python-client',
36+
'google-auth',
37+
'google-cloud-bigquery',
38+
'google-cloud-bigquery-datatransfer',
39+
'google-cloud-bigtable',
40+
'google-cloud-container',
41+
'google-cloud-core',
42+
'google-cloud-datastore',
43+
'google-cloud-dns',
44+
'google-cloud-error-reporting',
45+
'google-cloud-firestore',
46+
'google-cloud-language',
47+
'google-cloud-logging',
48+
'google-cloud-monitoring',
49+
'google-cloud-pubsub',
50+
'google-cloud-resource-manager',
51+
'google-cloud-runtimeconfig',
52+
'google-cloud-spanner',
53+
'google-cloud-speech',
54+
'google-cloud-storage',
55+
'google-cloud-trace',
56+
'google-cloud-translate',
57+
'google-cloud-videointelligence',
58+
'google-cloud-vision',
59+
'google-resumable-media',
60+
'apache-beam[gcp]',
61+
'google-apitools',
62+
'googleapis-common-protos',
63+
'grpc-google-iam-v1',
64+
'grpcio',
65+
'gsutil',
66+
'opencensus',
67+
'protobuf',
68+
'protorpc',
69+
'tensorboard',
70+
'tensorflow',
71+
'gcloud',
72+
]
73+
74+
WHITELIST_PKGS = PKG_LIST
75+
76+
WHITELIST_URLS = {
77+
_format_url('googleapis/google-cloud-python', 'asset'): 'gcloud',
78+
_format_url('googleapis/google-cloud-python', 'automl'): 'gcloud',
79+
_format_url('googleapis/google-cloud-python', 'dataproc'): 'gcloud',
80+
_format_url('googleapis/google-cloud-python', 'dlp'): 'gcloud',
81+
_format_url('googleapis/google-cloud-python', 'iot'): 'gcloud',
82+
_format_url('googleapis/google-cloud-python', 'kms'): 'gcloud',
83+
_format_url('googleapis/google-cloud-python', 'legacy/google-cloud'):
84+
'gcloud',
85+
_format_url('googleapis/google-cloud-python', 'ndb'): 'gcloud',
86+
_format_url('googleapis/google-cloud-python', 'oslogin'): 'gcloud',
87+
_format_url('googleapis/google-cloud-python', 'redis'): 'gcloud',
88+
_format_url('googleapis/google-cloud-python', 'securitycenter'): 'gcloud',
89+
_format_url('googleapis/google-cloud-python', 'tasks'): 'gcloud',
90+
_format_url('googleapis/google-cloud-python', 'test_utils'): 'gcloud',
91+
_format_url('googleapis/google-cloud-python', 'texttospeech'): 'gcloud',
92+
_format_url('googleapis/google-cloud-python', 'websecurityscanner'):
93+
'gcloud',
94+
_format_url('googleapis/google-cloud-python', 'api_core'):
95+
'google-api-core',
96+
_format_url('googleapis/google-cloud-python', 'bigquery'):
97+
'google-cloud-bigquery',
98+
_format_url('googleapis/google-cloud-python', 'bigquery_datatransfer'):
99+
'google-cloud-bigquery-datatransfer',
100+
_format_url('googleapis/google-cloud-python', 'bigtable'):
101+
'google-cloud-bigtable',
102+
_format_url('googleapis/google-cloud-python', 'container'):
103+
'google-cloud-container',
104+
_format_url('googleapis/google-cloud-python', 'core'):
105+
'google-cloud-core',
106+
_format_url('googleapis/google-cloud-python', 'datastore'):
107+
'google-cloud-datastore',
108+
_format_url('googleapis/google-cloud-python', 'dns'): 'google-cloud-dns',
109+
_format_url('googleapis/google-cloud-python', 'error_reporting'):
110+
'google-cloud-error-reporting',
111+
_format_url('googleapis/google-cloud-python', 'firestore'):
112+
'google-cloud-firestore',
113+
_format_url('googleapis/google-cloud-python', 'language'):
114+
'google-cloud-language',
115+
_format_url('googleapis/google-cloud-python', 'logging'):
116+
'google-cloud-logging',
117+
_format_url('googleapis/google-cloud-python', 'monitoring'):
118+
'google-cloud-monitoring',
119+
_format_url('googleapis/google-cloud-python', 'pubsub'):
120+
'google-cloud-pubsub',
121+
_format_url('googleapis/google-cloud-python', 'resource_manager'):
122+
'google-cloud-resource-manager',
123+
_format_url('googleapis/google-cloud-python', 'runtimeconfig'):
124+
'google-cloud-runtimeconfig',
125+
_format_url('googleapis/google-cloud-python', 'spanner'):
126+
'google-cloud-spanner',
127+
_format_url('googleapis/google-cloud-python', 'speech'):
128+
'google-cloud-speech',
129+
_format_url('googleapis/google-cloud-python', 'storage'):
130+
'google-cloud-storage',
131+
_format_url('googleapis/google-cloud-python', 'trace'):
132+
'google-cloud-trace',
133+
_format_url('googleapis/google-cloud-python', 'translate'):
134+
'google-cloud-translate',
135+
_format_url('googleapis/google-cloud-python', 'videointelligence'):
136+
'google-cloud-videointelligence',
137+
_format_url('googleapis/google-cloud-python', 'vision'):
138+
'google-cloud-vision',
139+
_format_url('googleapis/google-api-python-client'):
140+
'google-api-python-client',
141+
_format_url('googleapis/google-auth-library-python'): 'google-auth',
142+
_format_url('GoogleCloudPlatform/google-resumable-media-python'):
143+
'google-resumable-media',
144+
_format_url('apache/beam', 'sdks/python'): 'apache-beam[gcp]',
145+
_format_url('google/apitools'): 'google-apitools',
146+
_format_url('GoogleCloudPlatform/gsutil'): 'gsutil',
147+
_format_url('census-instrumentation/opencensus-python'): 'opencensus',
148+
_format_url('protocolbuffers/protobuf', 'python'): 'protobuf',
149+
_format_url('google/protorpc'): 'protorpc',
150+
_format_url('tensorflow/tensorflow', 'tensorflow/tools/pip_package'):
151+
'tensorflow',
152+
_format_url('tensorflow/tensorflow',
153+
'tensorflow/contrib/tpu/profiler/pip_package'): 'tensorflow',
154+
# TODO: The following projects do not use setup.py
155+
# googleapis-common-protos
156+
# grpc-google-iam-v1
157+
# grpcio
158+
# tensorboard - not sure what the build process is
159+
# _format_url('tensorflow/tensorboard', 'tensorboard/pip_package'):
160+
# 'tensorboard',
161+
}
162+
163+
# TODO: Find top 30 packages by download count in BigQuery table.
164+
THIRD_PARTY_PACKAGE_LIST = [
165+
'requests',
166+
'flask',
167+
'django',
168+
]
169+
170+
PKG_PY_VERSION_NOT_SUPPORTED = {
171+
2: ['tensorflow', ],
172+
3: ['apache-beam[gcp]', 'gsutil', ],
173+
}

0 commit comments

Comments
 (0)