Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
- name: Install Python
uses: actions/setup-python@v2
with:
python-version: 3.9
python-version: 3.12
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/deploy_pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- name: Install Python
uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.12
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
python: ['3.8', '3.9', '3.10']
python: ['3.11', '3.12', '3.13', '3.14']
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
Expand All @@ -20,7 +20,7 @@ jobs:
- name: run tests
run: python3 -m pytest --cov=tlviz
- name: upload coverage to Codecov
if: ${{matrix.os == 'ubuntu-latest' && matrix.python == '3.10'}}
if: ${{matrix.os == 'ubuntu-latest' && matrix.python == '3.13'}}
uses: codecov/codecov-action@v2
with:
verbose: true
14 changes: 7 additions & 7 deletions tlviz/_xarray_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ def add_factor_metadata(cp_tensor, dataset):
>>> bikes = load_oslo_city_bike()
>>> bikes.coords
Coordinates:
* End station name (End station name) object '7 Juni Plassen' ... 'Økernve...
lat (End station name) float64 59.92 59.93 ... 59.93 59.92
lon (End station name) float64 10.73 10.75 ... 10.8 10.78
* Hour (Hour) int32 0 1 2 3 4 5 6 7 8 ... 16 17 18 19 20 21 22 23
* Month (Month) int32 1 2 3 4 5 6 7 8 9 10 11 12
* Day of week (Day of week) int32 0 1 2 3 4 5 6
* Year (Year) int32 2020 2021
* End station name (End station name) object 2kB '7 Juni Plassen' ... 'Øke...
* Year (Year) int32 8B 2020 2021
* Month (Month) int32 48B 1 2 3 4 5 6 7 8 9 10 11 12
* Day of week (Day of week) int32 28B 0 1 2 3 4 5 6
* Hour (Hour) int32 96B 0 1 2 3 4 5 6 7 ... 17 18 19 20 21 22 23
lat (End station name) float64 2kB 59.92 59.93 ... 59.93 59.92
lon (End station name) float64 2kB 10.73 10.75 ... 10.8 10.78

We see that the ``End station name`` dimension has two additional columns: ``lat`` and ``lon``.
These contain metadata about the end station coordinates, and it can be useful to have these
Expand Down
8 changes: 4 additions & 4 deletions tlviz/model_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def sse(cp_tensor, dataset):
>>> cp = random_cp((4, 5, 6), 3, random_state=rng)
>>> X = rng.random_sample((4, 5, 6))
>>> sse(cp, X)
18.948918157419186
np.float64(18.948918157419186)
"""
X_hat = cp_to_tensor(cp_tensor)
return np.sum((dataset - X_hat) ** 2)
Expand Down Expand Up @@ -236,7 +236,7 @@ def relative_sse(cp_tensor, dataset, sum_squared_dataset=None):
>>> cp = random_cp((4, 5, 6), 3, random_state=rng)
>>> X = rng.random_sample((4, 5, 6))
>>> relative_sse(cp, X)
0.4817407254961442
np.float64(0.4817407254961442)
"""
if sum_squared_dataset is None:
sum_squared_x = np.sum(dataset**2)
Expand Down Expand Up @@ -279,13 +279,13 @@ def fit(cp_tensor, dataset, sum_squared_dataset=None):
>>> cp = random_cp((4, 5, 6), 3, random_state=rng)
>>> X = rng.random_sample((4, 5, 6))
>>> fit(cp, X)
0.5182592745038558
np.float64(0.5182592745038558)

We can see that it is equal to 1 - relative SSE

>>> from tlviz.model_evaluation import relative_sse
>>> 1 - relative_sse(cp, X)
0.5182592745038558
np.float64(0.5182592745038558)
"""
return 1 - relative_sse(cp_tensor, dataset, sum_squared_dataset=sum_squared_dataset)

Expand Down
4 changes: 2 additions & 2 deletions tlviz/multimodel_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,12 @@ def get_model_with_lowest_error(cp_tensors, dataset, error_function=None, return
And that it is the model that has the lowest error

>>> errors[index] == min(errors)
True
np.True_

And finally that this error is equal to the relative SSE

>>> errors[index] == relative_sse(model, dataset)
True
np.True_
"""
if error_function is None:
error_function = model_evaluation.relative_sse
Expand Down
39 changes: 27 additions & 12 deletions tlviz/visualisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,21 @@
]


def _get_next_style(ax):
"""Get the next style of the matplotlib axes property cycler and increment its position.

Before Matplotlib v3.8, we directly accessed the prop_cycler of ax._get_lines. However this attribute
was deleted to make pickling work correctly for Matplotlib axes, so now we need this workaround.
"""
lines = ax._get_lines
if hasattr(lines, "prop_cycler"):
return next(lines.prop_cycler)
else:
out = lines._cycler_items[lines._idx]
lines._idx = (lines._idx + 1) % len(lines._cycler_items)
return out


def scree_plot(cp_tensors, dataset, errors=None, metric="Fit", ax=None):
"""Create scree plot for the given cp tensors.

Expand Down Expand Up @@ -177,7 +192,7 @@ def histogram_of_residuals(cp_tensor, dataset, ax=None, standardised=True, **kwa
>>> true_cp, X = simulated_random_cp_tensor((10, 20, 30), 3, seed=0)
>>> est_cp = parafac(X, 3)
>>> histogram_of_residuals(est_cp, X)
<AxesSubplot: title={'center': 'Histogram of residuals'}, xlabel='Standardised residuals', ylabel='Frequency'>
<Axes: title={'center': 'Histogram of residuals'}, xlabel='Standardised residuals', ylabel='Frequency'>
>>> plt.show()
"""
estimated_dataset = cp_to_tensor(cp_tensor)
Expand Down Expand Up @@ -244,7 +259,7 @@ def residual_qq(cp_tensor, dataset, ax=None, use_pingouin=False, **kwargs):
>>> true_cp, X = simulated_random_cp_tensor((10, 20, 30), 3, seed=0)
>>> est_cp = parafac(X, 3)
>>> residual_qq(est_cp, X)
<AxesSubplot: title={'center': 'QQ-plot of residuals'}, xlabel='Theoretical Quantiles', ylabel='Sample Quantiles'>
<Axes: title={'center': 'QQ-plot of residuals'}, xlabel='Theoretical Quantiles', ylabel='Sample Quantiles'>
>>> plt.show()
"""
estimated_dataset = cp_to_tensor(cp_tensor)
Expand Down Expand Up @@ -337,7 +352,7 @@ def outlier_plot(
>>> outlier_plot(
... cp, data, leverage_rules_of_thumb='p-value', residual_rules_of_thumb='p-value', p_value=[0.05, 0.01]
... )
<AxesSubplot: title={'center': 'Outlier plot for End station name'}, xlabel='Leverage score', ylabel='Slabwise SSE'>
<Axes: title={'center': 'Outlier plot for End station name'}, xlabel='Leverage score', ylabel='Slabwise SSE'>
>>> plt.show()

We can also provide multiple types of rules of thumb
Expand All @@ -360,7 +375,7 @@ def outlier_plot(
>>> outlier_plot(
... cp, data, leverage_rules_of_thumb=['huber lower', 'hw higher'], residual_rules_of_thumb='two sigma'
... )
<AxesSubplot: title={'center': 'Outlier plot for End station name'}, xlabel='Leverage score', ylabel='Slabwise SSE'>
<Axes: title={'center': 'Outlier plot for End station name'}, xlabel='Leverage score', ylabel='Slabwise SSE'>
>>> plt.show()

See Also
Expand Down Expand Up @@ -438,7 +453,7 @@ def outlier_plot(

# Draw the lines
for key, value in leverage_thresholds.items():
ax.axvline(value, label=key, **next(ax._get_lines.prop_cycler))
ax.axvline(value, label=key, **_get_next_style(ax))

residual_thresholds = {}
if residual_rules_of_thumb is not None:
Expand All @@ -465,7 +480,7 @@ def outlier_plot(
name = residual_rule_of_thumb
residual_thresholds[name] = threshold
for key, value in residual_thresholds.items():
ax.axhline(value, label=key, **next(ax._get_lines.prop_cycler))
ax.axhline(value, label=key, **_get_next_style(ax))

if len(leverage_thresholds) > 0 or len(residual_thresholds) > 0:
ax.legend()
Expand Down Expand Up @@ -521,7 +536,7 @@ def component_scatterplot(cp_tensor, mode, x_component=0, y_component=1, ax=None
>>> import matplotlib.pyplot as plt
>>> cp_tensor = random_cp(shape=(5,10,15), rank=2)
>>> component_scatterplot(cp_tensor, mode=0)
<AxesSubplot: title={'center': 'Component plot'}, xlabel='Component 0', ylabel='Component 1'>
<Axes: title={'center': 'Component plot'}, xlabel='Component 0', ylabel='Component 1'>
>>> plt.show()

Eexample with PCA of a real stock dataset
Expand Down Expand Up @@ -553,7 +568,7 @@ def component_scatterplot(cp_tensor, mode, x_component=0, y_component=1, ax=None
>>>
>>> # Visualise the components with components_plot
>>> component_scatterplot(cp_tensor, mode=1)
<AxesSubplot: title={'center': 'Component plot'}, xlabel='Component 0', ylabel='Component 1'>
<Axes: title={'center': 'Component plot'}, xlabel='Component 0', ylabel='Component 1'>
>>> plt.show()
"""
if ax is None:
Expand Down Expand Up @@ -624,7 +639,7 @@ def core_element_plot(cp_tensor, dataset, normalised=False, ax=None):
>>> true_cp, X = simulated_random_cp_tensor((10, 20, 30), 3, seed=42)
>>> est_cp = parafac(X, 3)
>>> core_element_plot(est_cp, X)
<AxesSubplot: title={'center': 'Core consistency: 99.8'}, xlabel='Core element', ylabel='Value'>
<Axes: title={'center': 'Core consistency: 99.8'}, xlabel='Core element', ylabel='Value'>
>>> plt.show()
"""
weights, factors = cp_tensor
Expand Down Expand Up @@ -1300,7 +1315,7 @@ def percentage_variation_plot(
>>> import matplotlib.pyplot as plt
>>> cp_tensor, dataset = simulated_random_cp_tensor(shape=(5,10,15), rank=3, noise_level=0.5, seed=0)
>>> percentage_variation_plot(cp_tensor)
<AxesSubplot: xlabel='Component number', ylabel='Percentage variation explained [%]'>
<Axes: xlabel='Component number', ylabel='Percentage variation explained [%]'>
>>> plt.show()

We can also get the percentage of variation in the data that each component explains
Expand All @@ -1314,7 +1329,7 @@ def percentage_variation_plot(
>>> import matplotlib.pyplot as plt
>>> cp_tensor, dataset = simulated_random_cp_tensor(shape=(5,10,15), rank=3, noise_level=0.5, seed=0)
>>> percentage_variation_plot(cp_tensor, dataset, method="data")
<AxesSubplot: xlabel='Component number', ylabel='Percentage variation explained [%]'>
<Axes: xlabel='Component number', ylabel='Percentage variation explained [%]'>
>>> plt.show()

Or both the variation in the data and in the model
Expand All @@ -1328,7 +1343,7 @@ def percentage_variation_plot(
>>> import matplotlib.pyplot as plt
>>> cp_tensor, dataset = simulated_random_cp_tensor(shape=(5,10,15), rank=3, noise_level=0.5, seed=0)
>>> percentage_variation_plot(cp_tensor, dataset, method="both")
<AxesSubplot: xlabel='Component number', ylabel='Percentage variation explained [%]'>
<Axes: xlabel='Component number', ylabel='Percentage variation explained [%]'>
>>> plt.show()
"""
if ax is None:
Expand Down