Skip to content
This repository was archived by the owner on Oct 21, 2022. It is now read-only.

Commit cfb902c

Browse files
authored
Generate package pairs to check (#240)
1 parent 036c673 commit cfb902c

File tree

6 files changed

+217
-103
lines changed

6 files changed

+217
-103
lines changed

compatibility_lib/compatibility_lib/compatibility_checker.py

Lines changed: 62 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -51,45 +51,85 @@ def check(self, packages, python_version):
5151
result = requests.get(SERVER_URL, params=data)
5252
content = result.content.decode('utf-8')
5353

54-
return json.loads(content)
54+
return json.loads(content), python_version
5555

5656
def filter_packages(self, packages, python_version):
57-
return [pkg for pkg in packages if pkg not in
58-
configs.PKG_PY_VERSION_NOT_SUPPORTED[int(python_version)]]
59-
60-
@retrying.retry(wait_exponential_multiplier=5000,
61-
wait_exponential_max=20000)
57+
"""Filter out the packages not supported by the given py version."""
58+
filtered_packages = []
59+
for pkg in packages:
60+
if 'github.com' in pkg:
61+
pkg_name = configs.WHITELIST_URLS[pkg]
62+
else:
63+
pkg_name = pkg
64+
if pkg_name not in configs.PKG_PY_VERSION_NOT_SUPPORTED[
65+
int(python_version)]:
66+
filtered_packages.append(pkg)
67+
return filtered_packages
68+
69+
@retrying.retry(wait_random_min=1000,
70+
wait_random_max=2000)
6271
def retrying_check(self, args):
6372
"""Retrying logic for sending requests to checker server."""
6473
packages = args[0]
6574
python_version = args[1]
6675
return self.check(packages, python_version)
6776

68-
def get_self_compatibility(self, python_version, packages=None):
69-
"""Get the self compatibility data for each package."""
77+
def collect_check_packages(
78+
self, python_version=None, packages=None, pkg_sets=None):
79+
# Generating single packages
7080
if packages is None:
7181
packages = configs.PKG_LIST
72-
# Remove the package not supported in the python_version
73-
packages = self.filter_packages(packages, python_version)
74-
with concurrent.futures.ThreadPoolExecutor(
75-
max_workers=self.max_workers) as p:
76-
pkg_set_results = p.map(
77-
self.retrying_check,
78-
(([pkg], python_version) for pkg in packages))
7982

80-
for result in zip(pkg_set_results):
81-
yield result
83+
check_singles = []
84+
if python_version is None:
85+
for py_ver in ['2', '3']:
86+
# Remove the package not supported in the python_version
87+
packages = self.filter_packages(packages, py_ver)
88+
for pkg in packages:
89+
check_singles.append(([pkg], py_ver))
90+
else:
91+
packages = self.filter_packages(packages, python_version)
92+
check_singles = [([pkg], python_version) for pkg in packages]
8293

83-
def get_pairwise_compatibility(self, python_version, pkg_sets=None):
84-
"""Get pairwise compatibility data for each pair of packages."""
94+
# Generating pairs
8595
if pkg_sets is None:
86-
packages = self.filter_packages(configs.PKG_LIST, python_version)
87-
pkg_sets = itertools.combinations(packages, 2)
96+
pkg_sets = list(itertools.combinations(configs.PKG_LIST, 2))
97+
98+
check_pairs = []
99+
if python_version is None:
100+
for py_ver in ['2', '3']:
101+
filtered_pkgs = []
102+
for pkgs in pkg_sets:
103+
if list(pkgs) != self.filter_packages(pkgs,
104+
py_ver):
105+
continue
106+
filtered_pkgs.append(pkgs)
107+
for pkg_set in filtered_pkgs:
108+
check_pairs.append((list(pkg_set), py_ver))
109+
else:
110+
filtered_pkgs = []
111+
for pkgs in pkg_sets:
112+
if list(pkgs) != self.filter_packages(pkgs,
113+
python_version):
114+
continue
115+
filtered_pkgs.append(pkgs)
116+
check_pairs = [(list(pkg_set), python_version)
117+
for pkg_set in pkg_sets]
118+
119+
res = tuple(check_singles) + tuple(check_pairs)
120+
return res
121+
122+
def get_compatibility(
123+
self, python_version=None, packages=None, pkg_sets=None):
124+
"""Get the compatibility data for each package and package pairs."""
125+
check_packages = self.collect_check_packages(
126+
python_version, packages, pkg_sets)
127+
88128
with concurrent.futures.ThreadPoolExecutor(
89129
max_workers=self.max_workers) as p:
90130
pkg_set_results = p.map(
91131
self.retrying_check,
92-
((list(pkg_set), python_version) for pkg_set in pkg_sets))
132+
tuple(check_packages))
93133

94134
for result in zip(pkg_set_results):
95135
yield result

compatibility_lib/compatibility_lib/compatibility_store.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from google.cloud import bigquery
2525
from google.cloud.bigquery import table
2626

27+
from compatibility_lib import configs
2728
from compatibility_lib import package
2829

2930
_DATASET_NAME = 'compatibility_checker'
@@ -438,8 +439,8 @@ def save_compatibility_statuses(
438439
install_name_to_compatibility_result[install_name])
439440
new_version_string = self._get_package_version(cs)
440441

441-
old_version = version.StrictVersion(old_version_string)
442-
new_version = version.StrictVersion(new_version_string)
442+
old_version = version.LooseVersion(old_version_string)
443+
new_version = version.LooseVersion(new_version_string)
443444
if new_version > old_version:
444445
install_name_to_compatibility_result[install_name] = cs
445446

@@ -478,6 +479,8 @@ def _get_package_version(self, result: CompatibilityResult) -> str:
478479
raise ValueError('multiple packages found in CompatibilityResult')
479480

480481
install_name = result.packages[0].install_name
482+
if 'github.com' in install_name:
483+
install_name = configs.WHITELIST_URLS[install_name]
481484
install_name_sanitized = install_name.split('[')[0]
482485

483486
for pkg, version_info in result.dependency_info.items():

compatibility_lib/compatibility_lib/configs.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,18 +171,16 @@ def _format_url(repo_name, setuppy_path=''):
171171
_format_url('google/apitools'): 'google-apitools',
172172
_format_url('GoogleCloudPlatform/gsutil'): 'gsutil',
173173
_format_url('census-instrumentation/opencensus-python'): 'opencensus',
174-
_format_url('protocolbuffers/protobuf', 'python'): 'protobuf',
175174
_format_url('google/protorpc'): 'protorpc',
176175
_format_url('tensorflow/tensorflow', 'tensorflow/tools/pip_package'):
177176
'tensorflow',
178-
_format_url('tensorflow/tensorflow',
179-
'tensorflow/contrib/tpu/profiler/pip_package'): 'tensorflow',
180177
_format_url('GoogleCloudPlatform/cloud-opensource-python',
181178
'compatibility_lib'): 'compatibility-lib',
182179
# TODO: The following projects do not use setup.py
183180
# googleapis-common-protos
184181
# grpc-google-iam-v1
185182
# grpcio
183+
# protobuf
186184
# tensorboard - not sure what the build process is
187185
# _format_url('tensorflow/tensorboard', 'tensorboard/pip_package'):
188186
# 'tensorboard',

compatibility_lib/compatibility_lib/get_compatibility_data.py

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,34 @@
1414

1515
"""Get self and pairwise compatibility data and write to bigquery."""
1616

17+
import argparse
1718
import datetime
19+
import itertools
1820

1921
from compatibility_lib import compatibility_checker
2022
from compatibility_lib import compatibility_store
2123
from compatibility_lib import configs
2224
from compatibility_lib import package
2325

24-
checker = compatibility_checker.CompatibilityChecker()
26+
checker = compatibility_checker.CompatibilityChecker(max_workers=800)
2527
store = compatibility_store.CompatibilityStore()
2628

2729
PY2 = '2'
2830
PY3 = '3'
2931

3032

31-
def _result_dict_to_compatibility_result(results, python_version):
33+
def _result_dict_to_compatibility_result(results):
3234
res_list = []
3335

3436
for item in results:
3537
res_dict = item[0]
36-
check_result = res_dict.get('result')
38+
result_content, python_version = res_dict
39+
check_result = result_content.get('result')
3740
packages_list = [package.Package(pkg)
38-
for pkg in res_dict.get('packages')]
39-
details = res_dict.get('description')
41+
for pkg in result_content.get('packages')]
42+
details = result_content.get('description')
4043
timestamp = datetime.datetime.now().isoformat()
41-
dependency_info = res_dict.get('dependency_info')
44+
dependency_info = result_content.get('dependency_info')
4245

4346
compatibility_result = compatibility_store.CompatibilityResult(
4447
packages=packages_list,
@@ -53,27 +56,56 @@ def _result_dict_to_compatibility_result(results, python_version):
5356
return res_list
5457

5558

56-
def write_to_status_table():
59+
def get_package_pairs(check_pypi=False, check_github=False):
60+
"""Get package pairs for pypi and github head."""
61+
self_packages = []
62+
pair_packages = []
63+
if check_pypi:
64+
# Get pypi packages for single checks
65+
self_packages.extend(configs.PKG_LIST)
66+
# Get pypi packages for pairwise checks
67+
pypi_pairs = list(itertools.combinations(configs.PKG_LIST, 2))
68+
pair_packages.extend(pypi_pairs)
69+
if check_github:
70+
# Get github head packages for single checks
71+
self_packages.extend(list(configs.WHITELIST_URLS.keys()))
72+
# Get github head packages for pairwise checks
73+
for gh_url in configs.WHITELIST_URLS:
74+
pairs = []
75+
gh_name = configs.WHITELIST_URLS[gh_url]
76+
for pypi_pkg in configs.PKG_LIST:
77+
if pypi_pkg != gh_name:
78+
pairs.append((gh_url, pypi_pkg))
79+
pair_packages.extend(pairs)
80+
81+
return self_packages, pair_packages
82+
83+
84+
def write_to_status_table(check_pypi=False, check_github=False):
5785
"""Get the compatibility status for PyPI versions."""
5886
# Write self compatibility status to BigQuery
59-
self_res_list = []
60-
packages = configs.PKG_LIST
61-
for py_version in [PY2, PY3]:
62-
results = checker.get_self_compatibility(
63-
python_version=py_version,
64-
packages=packages)
65-
res_list = _result_dict_to_compatibility_result(results, py_version)
66-
self_res_list.extend(res_list)
87+
self_packages, pair_packages = get_package_pairs(check_pypi, check_github)
88+
results = checker.get_compatibility(
89+
packages=self_packages, pkg_sets=pair_packages)
90+
res_list = _result_dict_to_compatibility_result(results)
6791

68-
store.save_compatibility_statuses(self_res_list)
69-
70-
# Write pairwise compatibility status to BigQuery
71-
for py_version in [PY2, PY3]:
72-
# For PyPI released versions
73-
results = checker.get_pairwise_compatibility(py_version)
74-
res_list = _result_dict_to_compatibility_result(results, py_version)
75-
store.save_compatibility_statuses(res_list)
92+
store.save_compatibility_statuses(res_list)
7693

7794

7895
if __name__ == '__main__':
79-
write_to_status_table()
96+
parser = argparse.ArgumentParser(description='Determine what to check.')
97+
parser.add_argument(
98+
'--pypi',
99+
type=bool,
100+
default=False,
101+
help='Check PyPI released packages or not.')
102+
parser.add_argument(
103+
'--github',
104+
type=bool,
105+
default=False,
106+
help='Check GitHub head packages or not.')
107+
args = parser.parse_args()
108+
109+
check_pypi = args.pypi
110+
check_github = args.github
111+
write_to_status_table(check_pypi, check_github)

compatibility_lib/compatibility_lib/test_compatibility_checker.py

Lines changed: 17 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -54,50 +54,15 @@ def _mock_retrying_check(self, *args):
5454
python_version = args[0][1]
5555
return (packages, python_version, 'SUCCESS')
5656

57-
def test_get_self_compatibility(self):
57+
def test_get_compatibility(self):
5858
checker = compatibility_checker.CompatibilityChecker()
5959

60-
pkg_list = ['pkg1', 'pkg2']
61-
pkg_py_version_not_supported = {
62-
2: ['tensorflow', ],
63-
3: ['apache-beam[gcp]', 'gsutil', ],
64-
}
65-
python_version = 3
66-
67-
mock_config = mock.Mock()
68-
mock_config.PKG_LIST = pkg_list
69-
mock_config.PKG_PY_VERSION_NOT_SUPPORTED = pkg_py_version_not_supported
70-
patch_config = mock.patch(
71-
'compatibility_lib.compatibility_checker.configs', mock_config)
72-
73-
patch_executor = mock.patch(
74-
'compatibility_lib.compatibility_checker.concurrent.futures.ThreadPoolExecutor',
75-
FakeExecutor)
76-
patch_retrying_check = mock.patch.object(
77-
compatibility_checker.CompatibilityChecker,
78-
'retrying_check',
79-
self._mock_retrying_check)
80-
81-
res = []
82-
with patch_config, patch_executor, patch_retrying_check:
83-
result = checker.get_self_compatibility(python_version)
84-
85-
for item in result:
86-
res.append(item)
87-
88-
self.assertEqual(res,
89-
[((['pkg1'], 3, 'SUCCESS'),),
90-
((['pkg2'], 3, 'SUCCESS'),)])
91-
92-
def test_get_pairwise_compatibility(self):
9360
pkg_list = ['pkg1', 'pkg2', 'pkg3']
9461
pkg_py_version_not_supported = {
9562
2: ['tensorflow', ],
9663
3: ['apache-beam[gcp]', 'gsutil', ],
9764
}
9865

99-
python_version = 3
100-
10166
mock_config = mock.Mock()
10267
mock_config.PKG_LIST = pkg_list
10368
mock_config.PKG_PY_VERSION_NOT_SUPPORTED = pkg_py_version_not_supported
@@ -114,16 +79,26 @@ def test_get_pairwise_compatibility(self):
11479

11580
res = []
11681
with patch_config, patch_executor, patch_retrying_check:
117-
checker = compatibility_checker.CompatibilityChecker()
118-
result = checker.get_pairwise_compatibility(python_version)
82+
result = checker.get_compatibility()
11983

12084
for item in result:
12185
res.append(item)
12286

123-
self.assertEqual(res,
124-
[((['pkg1', 'pkg2'], 3, 'SUCCESS'),),
125-
((['pkg1', 'pkg3'], 3, 'SUCCESS'),),
126-
((['pkg2', 'pkg3'], 3, 'SUCCESS'),)])
87+
expected = sorted([
88+
((['pkg1'], '2', 'SUCCESS'),),
89+
((['pkg2'], '2', 'SUCCESS'),),
90+
((['pkg3'], '2', 'SUCCESS'),),
91+
((['pkg1'], '3', 'SUCCESS'),),
92+
((['pkg2'], '3', 'SUCCESS'),),
93+
((['pkg3'], '3', 'SUCCESS'),),
94+
((['pkg1', 'pkg2'], '2', 'SUCCESS'),),
95+
((['pkg1', 'pkg3'], '2', 'SUCCESS'),),
96+
((['pkg2', 'pkg3'], '2', 'SUCCESS'),),
97+
((['pkg1', 'pkg2'], '3', 'SUCCESS'),),
98+
((['pkg1', 'pkg3'], '3', 'SUCCESS'),),
99+
((['pkg2', 'pkg3'], '3', 'SUCCESS'),)])
100+
101+
self.assertEqual(sorted(res), expected)
127102

128103

129104
class FakeExecutor(object):

0 commit comments

Comments
 (0)