Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,207 changes: 1,207 additions & 0 deletions notebooks/basic_network.ipynb

Large diffs are not rendered by default.

430 changes: 430 additions & 0 deletions notebooks/basic_random_forest.ipynb

Large diffs are not rendered by default.

388 changes: 388 additions & 0 deletions notebooks/basic_sklearn.ipynb

Large diffs are not rendered by default.

590 changes: 590 additions & 0 deletions notebooks/basic_torch.ipynb

Large diffs are not rendered by default.

78 changes: 78 additions & 0 deletions src/MaCh3PythonUtils/file_handling/chain_diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from MaCh3PythonUtils.file_handling.chain_handler import ChainHandler
import matplotlib.pyplot as plt

class ChainDiagnostics:
def __init__(self, config_reader: ChainHandler) -> None:
self._chain_handler = config_reader

def _extract_chain_information(self, parameter_name: str | int):
if isinstance(parameter_name, str):
parameter_id = self._chain_handler.ttree_array.columns.get_loc(parameter_name)
if isinstance(parameter_name, int):
parameter_id = parameter_name
parameter_name = self._chain_handler.ttree_array.columns[parameter_id]

return self._chain_handler.ttree_array.iloc[:,parameter_id], parameter_name

def __make_plot(self, fig, axs):
if fig is None:
fig, axs = plt.subplots(1, 1, figsize=(10, 5))
elif axs is None:
axs = fig.add_subplot(1, 1, 1)

return fig, axs

def make_trace_plot(self, parameter_name: str | int, axs=None, fig =None):
fig, axs = self.__make_plot(fig, axs)

chain, parameter_name = self._extract_chain_information(parameter_name)
axs.plot(chain, linewidth=0.5, color='darkorange')

return fig, axs

def make_autocorr_plot(self, parameter_name: str | int, axs=None, fig =None):
fig, axs = self.__make_plot(fig, axs)

if fig is None:
fig, axs = plt.subplots(1, 1, figsize=(10, 5))
elif axs is None:
axs = fig.add_subplot(1, 1, 1)

chain, parameter_name = self._extract_chain_information(parameter_name)
axs.acorr(chain, maxlags=1000, linewidth=0.5, color='darkorange')

return fig, axs

def make_posterior_hist_plot(self, parameter_name: str | int, axs=None, fig =None, is_horizontal=False):
fig, axs = self.__make_plot(fig, axs)

if fig is None:
fig, axs = plt.subplots(1, 1, figsize=(10, 5))
elif axs is None:
axs = fig.add_subplot(1, 1, 1)

orientation = 'vertical'

if is_horizontal:
orientation = 'horizontal'

chain, parameter_name = self._extract_chain_information(parameter_name)
axs.hist(chain, bins=50, density=True, linewidth=0.5, color='darkorange', alpha=0.5, orientation=orientation)

return fig, axs

def __call__(self, parameter_name: str):
fig, axs = plt.subplots(2, 2, figsize=(15, 5))
axs[1][1].remove()
axs[1][0].remove()

fig, axs[0][0] = self.make_trace_plot(parameter_name, axs=axs[0][0], fig=fig)
fig, axs[0][1] = self.make_posterior_hist_plot(parameter_name, axs=axs[0][1], fig=fig, is_horizontal=True)

# To share the same axis etc,
plt.setp(axs[0][1].get_yticklabels(), visible=False)
fig.subplots_adjust(wspace=.0)


# fig, axs[1][0] = self.make_autocorr_plot(parameter_name, axs=axs[1][0], fig=fig)
return fig, axs
63 changes: 47 additions & 16 deletions src/MaCh3PythonUtils/machine_learning/file_ml_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

from rich import print

class FileMLInterface(ABC):
white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
(0, '#ffffff'),
Expand Down Expand Up @@ -59,7 +61,7 @@ def __init__(self, chain: ChainHandler, prediction_variable: str, fit_name: str)
self._scaler = StandardScaler()
# self._pca_matrix = PCA(n_components=0.95)

self._label_scaler = MinMaxScaler(feature_range=(0, 1))
self._label_scaler = StandardScaler()



Expand All @@ -86,19 +88,19 @@ def set_training_test_set(self, test_size: float):
self._training_data, self._test_data, self._training_labels, self._test_labels = train_test_split(features, labels, test_size=test_size)

# Fit scaling pre-processors. These get applied properly when scale_data is called
_= self._scaler.fit_transform(self._training_data)
self._label_scaler.fit_transform(self._training_labels)
self._scaler.fit(self._training_data)
self._label_scaler.fit(self._training_labels)

# self._pca_matrix.fit(scaled_training)

def scale_data(self, input_data):
# Applies transformations to data set
scale_data = self._scaler.transform(input_data)
# scale_data = self._pca_matrix.transform(scale_data)
return scale_data

def scale_labels(self, labels):
return self._label_scaler.transform(labels)
# return labels.values.reshape(-1, 1)

def invert_scaling(self, input_data):
# Inverts transform
Expand Down Expand Up @@ -193,7 +195,7 @@ def load_model(self, input_model: str):
:param input_file: Pickled Model
:type input_file: str
"""
print(f"Attempting to load file from {input_file}")
print(f"[spring_green1]Attempting to load file from[/spring_green1][bold red3] {input_file}")
with open(input_model, 'r') as f:
self._model = pickle.load(f)

Expand All @@ -216,14 +218,18 @@ def test_model(self):
train_as_numpy = self.scale_labels(self._training_labels).T[0]
self.evaluate_model(train_prediction, train_as_numpy, "train_qq_plot.pdf")

print("=====\n\n")
print("=====")
print("Testing Results!")

test_prediction = self.model_predict(self._test_data)
test_as_numpy = self.scale_labels(self._test_labels).T[0]

self.evaluate_model(test_prediction, test_as_numpy, outfile=f"{self._fit_name}")
print("=====\n\n")
print("=====")


def print_model_summary(self):
print("Model Summary")

def model_predict_single_sample(self, sample):
sample_shaped = sample.reshape(1,-1)
Expand All @@ -232,7 +238,7 @@ def model_predict_single_sample(self, sample):
def get_maxlikelihood(self)->OptimizeResult:
init_vals = self.training_data.iloc[[1]].to_numpy()[0]

print("Calculating max LLH")
print("[bold purple]Calculating max LLH")
maximal_likelihood = minimize(self.model_predict_single_sample, init_vals, bounds=zip(self._chain.lower_bounds[:-1], self._chain.upper_bounds[:-1]), method="L-BFGS-B", options={"disp": True})
return maximal_likelihood

Expand All @@ -245,9 +251,9 @@ def run_likelihood_scan(self, n_divisions: int = 500):

errors = np.sqrt(np.diag(maximal_likelihood.hess_inv(np.identity(self.chain.ndim-1))))

print("Maximal Pars :")
print("[bold red3]Maximal Pars :")
for i in range(self.chain.ndim-1):
print(f"Param : {self.chain.plot_branches[i]} : {maximal_likelihood.x[i]}±{errors[i]}")
print(f"[bold red3]Param :[/bold red3] [yellow3]{self.chain.plot_branches[i]} : {maximal_likelihood.x[i]}±{errors[i]}")


with PdfPages("llh_scan.pdf") as pdf:
Expand Down Expand Up @@ -285,13 +291,14 @@ def evaluate_model(self, predicted_values: Iterable, true_values: Iterable, outf
:type outfile: str, optional
"""

print(predicted_values)
print(f"Mean Absolute Error : {metrics.mean_absolute_error(predicted_values,true_values)}")

print(f"[bold red3]Mean Absolute Error :[/bold red3] [yellow3]{metrics.mean_absolute_error(predicted_values,true_values)}")

outfile_name = outfile.split(".")[0]
outfile = f"{outfile_name}.pdf"
warnings.filterwarnings("ignore", message="Polyfit may be poorly conditioned")
lobf = np.poly1d(np.polyfit(predicted_values, true_values, 1))

print(f"Line of best fit : y={lobf.c[0]}x + {lobf.c[1]}")
print(f"[bold purple]Line of best fit :[/bold purple] [dodger_blue1]y={lobf.c[0]}x + {lobf.c[1]}")

fig = plt.figure()

Expand Down Expand Up @@ -322,11 +329,21 @@ def evaluate_model(self, predicted_values: Iterable, true_values: Iterable, outf
ax.set_ylabel("True Log Likelihood")

fig.legend()

if outfile=="": outfile = f"evaluated_model_qq_tf.pdf"

print(f"Saving QQ to {outfile}")
print(f"[bold spring_green1]Saving QQ to[/bold spring_green1][dodger_blue1] {outfile}")

fig.savefig(outfile)

try:
is_notebook = self.is_notebook()
if is_notebook:
plt.show()
except Exception:
...


plt.close()

# Gonna draw a hist
Expand All @@ -335,4 +352,18 @@ def evaluate_model(self, predicted_values: Iterable, true_values: Iterable, outf
plt.hist(difs, bins=100, density=True, range=(np.std(difs)*-5, np.std(difs)*5))
plt.xlabel("True - Pred")
plt.savefig(f"diffs_5sigma_range_{outfile}")
plt.close()

plt.close()

@classmethod
def is_notebook(cls) -> bool:
try:
shell = get_ipython().__class__.__name__
if shell == 'ZMQInteractiveShell':
return True # Jupyter notebook or qtconsole
elif shell == 'TerminalInteractiveShell':
return False # Terminal running IPython
else:
return False # Other type (?)
except NameError:
return False # Probably standard Python interpreter
30 changes: 26 additions & 4 deletions src/MaCh3PythonUtils/machine_learning/ml_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
from MaCh3PythonUtils.machine_learning.tensorflow.tf_manual_interface import TfManualLayeredInterface
from MaCh3PythonUtils.machine_learning.tensorflow.tf_interface import TfInterface

from MaCh3PythonUtils.file_handling.chain_handler import ChainHandler
from MaCh3PythonUtils.machine_learning.torch.torch_interface import TorchInterface

from MaCh3PythonUtils.file_handling.chain_handler import ChainHandler
import sklearn.ensemble as ske
import tensorflow.keras as tfk

Expand All @@ -32,6 +33,9 @@ class MLFactory:
"normalizing_flow": TfNormalizingFlowModel,
"autotune": TfAutotuneInterface
},
"torch": {
"sequential": TorchInterface
}
}

def __init__(self, input_chain: ChainHandler, prediction_variable: str, plot_name: str):
Expand Down Expand Up @@ -92,7 +96,7 @@ def __make_scikit_model(self, algorithm: str, **kwargs)->SciKitInterface:
def __make_tensorflow_layered_model(self, interface: TfManualLayeredInterface, layers: dict)->TfManualLayeredInterface:
for layer in layers:
layer_id = list(layer.keys())[0]
interface.add_layer(layer_id, layer[layer_id])
interface.add_layer(layer_id, layer[layer_id].copy())

return interface

Expand All @@ -106,14 +110,28 @@ def __make_tensorflow_model(self, algorithm: str, **kwargs)->TfInterface:

# Ugh
if algorithm=="sequential" or algorithm=="residual":
print("HERE")
model = self.__make_tensorflow_layered_model(model, kwargs["Layers"])
model.set_training_settings(kwargs.get("FitSettings"))


model.build_model(**kwargs["BuildSettings"])

return model

def __make_torch_model(self, algorithm: str, **kwargs)->TorchInterface:
model_func = self.__IMPLEMENTED_ALGORITHMS["torch"].get(algorithm.lower(), None)

if model_func is None:
raise Exception(f"Cannot find {algorithm}")

model: TorchInterface = model_func(self._chain, self._prediction_variable, self._plot_name)

for layer in kwargs["Layers"]:
layer_id = list(layer.keys())[0]
model.add_layer(layer_id=layer_id, layer_args=layer[layer_id].copy())

model.build_model(**kwargs["BuildSettings"], **kwargs["FitSettings"])
return model

def make_interface(self, interface_type: str, algorithm: str, **kwargs):
interface_type = interface_type.lower()
Expand All @@ -122,5 +140,9 @@ def make_interface(self, interface_type: str, algorithm: str, **kwargs):
return self.__make_scikit_model(algorithm, **kwargs)
case "tensorflow":
return self.__make_tensorflow_model(algorithm, **kwargs)
case "torch":
return self.__make_torch_model(algorithm, **kwargs)

case _:
raise Exception(f"{interface_type} not implemented!")
raise Exception(f"{interface_type} not implemented!")

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pandas import DataFrame
from MaCh3PythonUtils.machine_learning.file_ml_interface import FileMLInterface
from tqdm import tqdm

"""
TODO:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,5 @@ def evaluate_model(self, predicted_values: Iterable, true_values: Iterable, outf

# CODE TO DO TF SPECIFIC PLOTS GOES HERE

return super().evaluate_model(predicted_values, true_values, outfile)
return super().evaluate_model(predicted_values, true_values, outfile)

Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@ def train_model(self):
scaled_data = self.scale_data(self._training_data)
scaled_labels = self.scale_labels(self._training_labels)

lr_schedule = tfk.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=10, factor=0.5, min_lr=1e-8, verbose=1)
lr_schedule = tfk.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=10, factor=0.1, min_lr=1e-9, verbose=1)
stop_early = tfk.callbacks.EarlyStopping(monitor='val_loss', patience=20)

self._model.fit(scaled_data, scaled_labels, **self._training_settings, callbacks=[lr_schedule, stop_early])


Expand Down Expand Up @@ -53,12 +52,6 @@ def add_layer(self, layer_id: str, layer_args: dict):
# Hacky, swaps string value of regularliser for proper one
layer_args["kernel_regularizer"] = tfk.regularizers.L2(layer_args["kernel_regularizer"])


self._layers.append(self.__TF_LAYER_IMPLEMENTATIONS[layer_id.lower()](**layer_args))








Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ def build_model(self, **kwargs: dict):
if self._model is None or not self._layers:
raise ValueError("No model can be built! Please setup model and layers")

# Add input layer
self._model.add(tfk.layers.InputLayer(input_shape=(self._chain.ndim-1,)))

for layer in self._layers:
self._model.add(layer)

self._model.build()
optimizer = tfk.optimizers.AdamW(learning_rate=kwargs.get("learning_rate", 1e-5),
weight_decay=1e-4, clipnorm=1.0)
optimizer = tfk.optimizers.Adam(learning_rate=kwargs.get("learning_rate", 1e-5), clipnorm=10.0)

kwargs.pop("learning_rate", None)


self._model.compile(**kwargs, optimizer=optimizer)
self._model.compile(**kwargs, optimizer=optimizer)

Binary file not shown.
Empty file.
Loading