Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 55 additions & 23 deletions tutorials/machine_learning/TMVA_SOFIE_Keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,52 +9,85 @@
### \author Sanjiban Sengupta and Lorenzo Moneta


import contextlib
import warnings

import numpy as np
import ROOT
from tensorflow.keras.layers import Activation, Dense, Input, Softmax
from tensorflow.keras.models import Model

# Enable ROOT in batch mode (same effect as -nodraw)
ROOT.gROOT.SetBatch(True)


@contextlib.contextmanager
def expect_warning(category, message):
"""Silence a known third-party warning and raise if it stops firing.

Notifies us to drop the workaround once the upstream library is fixed.
"""
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always")
yield
seen = False
for w in caught:
if issubclass(w.category, category) and message in str(w.message):
seen = True
else:
warnings.warn_explicit(w.message, w.category, w.filename, w.lineno)
if not seen:
raise RuntimeError(
f"Expected {category.__name__} containing {message!r} was not "
"emitted. This tutorial's workaround can probably be removed."
)


# -----------------------------------------------------------------------------
# Step 1: Create and train a simple Keras model (via embedded Python)
# -----------------------------------------------------------------------------

import numpy as np
from tensorflow.keras.layers import Activation, Dense, Input, Softmax
from tensorflow.keras.models import Model

input=Input(shape=(4,),batch_size=2)
x=Dense(32)(input)
x=Activation('relu')(x)
x=Dense(16,activation='relu')(x)
x=Dense(8,activation='relu')(x)
x=Dense(2)(x)
output=Softmax()(x)
model=Model(inputs=input,outputs=output)
input = Input(shape=(4,), batch_size=2)
x = Dense(32)(input)
x = Activation("relu")(x)
x = Dense(16, activation="relu")(x)
x = Dense(8, activation="relu")(x)
x = Dense(2)(x)
output = Softmax()(x)
model = Model(inputs=input, outputs=output)

randomGenerator=np.random.RandomState(0)
x_train=randomGenerator.rand(4,4)
y_train=randomGenerator.rand(4,2)
randomGenerator = np.random.RandomState(0)
x_train = randomGenerator.rand(4, 4)
y_train = randomGenerator.rand(4, 2)

model.compile(loss='mse', optimizer='adam')
model.compile(loss="mse", optimizer="adam")
model.fit(x_train, y_train, epochs=3, batch_size=2)
model.save('KerasModel.keras')

# Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet implement
# the NumPy 2.0 ``__array__(copy=...)`` signature, so saving the model emits a
# DeprecationWarning that we cannot fix from user code.
if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0):
ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword")
else:
ctx = contextlib.nullcontext()

with ctx:
model.save("KerasModel.keras")

model.summary()

# -----------------------------------------------------------------------------
# Step 2: Use TMVA::SOFIE to parse the ONNX model
# -----------------------------------------------------------------------------

import ROOT

# Parse the ONNX model

model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse("KerasModel.keras")

# Generate inference code
model.Generate()
model.OutputGenerated()
#print generated code
# print generated code
print("\n**************************************************")
print(" Generated code")
print("**************************************************\n")
Expand All @@ -69,14 +102,13 @@
# Step 3: Run inference
# -----------------------------------------------------------------------------

#instantiate SOFIE session class
# instantiate SOFIE session class
session = ROOT.TMVA_SOFIE_KerasModel.Session()

# Input tensor (same shape as training input)
x = np.array([[0.1, 0.2, 0.3, 0.4],[0.5, 0.6, 0.7, 0.8]], dtype=np.float32)
x = np.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]], dtype=np.float32)

# Run inference
y = session.infer(x)

print("Inference output:", y)

161 changes: 98 additions & 63 deletions tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
### \author Lorenzo Moneta


import contextlib
import warnings
from os.path import exists

import numpy as np
Expand All @@ -16,82 +18,118 @@
from sklearn.model_selection import train_test_split


def CreateModel(nlayers = 4, nunits = 64):
input = layers.Input(shape=(7,))
x = input
for i in range(1,nlayers) :
y = layers.Dense(nunits, activation='relu')(x)
x = y
@contextlib.contextmanager
def expect_warning(category, message):
"""Silence a known third-party warning and raise if it stops firing.

output = layers.Dense(1, activation='sigmoid')(x)
model = models.Model(input, output)
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', weighted_metrics = ['accuracy'])
model.summary()
return model
Notifies us to drop the workaround once the upstream library is fixed.
"""
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always")
yield
seen = False
for w in caught:
if issubclass(w.category, category) and message in str(w.message):
seen = True
else:
warnings.warn_explicit(w.message, w.category, w.filename, w.lineno)
if not seen:
raise RuntimeError(
f"Expected {category.__name__} containing {message!r} was not "
"emitted. This tutorial's workaround can probably be removed."
)

def PrepareData() :
#get the input data
inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root"

df1 = ROOT.RDataFrame("sig_tree", inputFile)
sigData = df1.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'])
#print(sigData)
def CreateModel(nlayers=4, nunits=64):
input = layers.Input(shape=(7,))
x = input
for i in range(1, nlayers):
y = layers.Dense(nunits, activation="relu")(x)
x = y

# stack all the 7 numpy array in a single array (nevents x nvars)
xsig = np.column_stack(list(sigData.values()))
data_sig_size = xsig.shape[0]
print("size of data", data_sig_size)
output = layers.Dense(1, activation="sigmoid")(x)
model = models.Model(input, output)
model.compile(loss="binary_crossentropy", optimizer="adam", weighted_metrics=["accuracy"])
model.summary()
return model

# make SOFIE inference on background data
df2 = ROOT.RDataFrame("bkg_tree", inputFile)
bkgData = df2.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'])
xbkg = np.column_stack(list(bkgData.values()))
data_bkg_size = xbkg.shape[0]

ysig = np.ones(data_sig_size)
ybkg = np.zeros(data_bkg_size)
inputs_data = np.concatenate((xsig,xbkg),axis=0)
inputs_targets = np.concatenate((ysig,ybkg),axis=0)
def PrepareData():
# get the input data
inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root"

#split data in training and test data
df1 = ROOT.RDataFrame("sig_tree", inputFile)
sigData = df1.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"])
# print(sigData)

x_train, x_test, y_train, y_test = train_test_split(
inputs_data, inputs_targets, test_size=0.50, random_state=1234)
# stack all the 7 numpy array in a single array (nevents x nvars)
xsig = np.column_stack(list(sigData.values()))
data_sig_size = xsig.shape[0]
print("size of data", data_sig_size)

return x_train, y_train, x_test, y_test
# make SOFIE inference on background data
df2 = ROOT.RDataFrame("bkg_tree", inputFile)
bkgData = df2.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"])
xbkg = np.column_stack(list(bkgData.values()))
data_bkg_size = xbkg.shape[0]

def TrainModel(model, x, y, name) :
model.fit(x,y,epochs=5,batch_size=50)
modelFile = name + '.keras'
model.save(modelFile)
return model, modelFile
ysig = np.ones(data_sig_size)
ybkg = np.zeros(data_bkg_size)
inputs_data = np.concatenate((xsig, xbkg), axis=0)
inputs_targets = np.concatenate((ysig, ybkg), axis=0)

# split data in training and test data

def GenerateCode(modelFile = "model.keras") :
x_train, x_test, y_train, y_test = train_test_split(inputs_data, inputs_targets, test_size=0.50, random_state=1234)

#check if the input file exists
if not exists(modelFile):
raise FileNotFoundError("INput model file not existing. You need to run TMVA_Higgs_Classification.C to generate the Keras trained model")
return x_train, y_train, x_test, y_test


#parse the input Keras model into RModel object (force batch size to be 1)
model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile)
def TrainModel(model, x, y, name):
model.fit(x, y, epochs=5, batch_size=50)
modelFile = name + ".keras"

#Generating inference code
model.Generate()
model.OutputGenerated()
# Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet implement
# the NumPy 2.0 ``__array__(copy=...)`` signature, so saving the model emits a
# DeprecationWarning that we cannot fix from user code.
if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0):
ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword")
else:
ctx = contextlib.nullcontext()

with ctx:
model.save(modelFile)

return model, modelFile


def GenerateCode(modelFile="model.keras"):

# check if the input file exists
if not exists(modelFile):
raise FileNotFoundError(
"INput model file not existing. You need to run TMVA_Higgs_Classification.C to generate the Keras trained model"
)

# parse the input Keras model into RModel object (force batch size to be 1)
model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile)

# Generating inference code
model.Generate()
model.OutputGenerated()

modelName = modelFile.replace(".keras", "")
return modelName

modelName = modelFile.replace(".keras","")
return modelName

###################################################################
## Step 1 : Create and Train model
###################################################################

x_train, y_train, x_test, y_test = PrepareData()
#create dense model with 3 layers of 64 units
model = CreateModel(3,64)
model, modelFile = TrainModel(model,x_train, y_train, 'HiggsModel')
# create dense model with 3 layers of 64 units
model = CreateModel(3, 64)
model, modelFile = TrainModel(model, x_train, y_train, "HiggsModel")

###################################################################
## Step 2 : Parse model and generate inference code with SOFIE
Expand All @@ -110,20 +148,17 @@ def GenerateCode(modelFile = "model.keras") :
## Step 4: Evaluate the model
###################################################################

#get first the SOFIE session namespace
sofie = getattr(ROOT, 'TMVA_SOFIE_' + modelName)
# get first the SOFIE session namespace
sofie = getattr(ROOT, "TMVA_SOFIE_" + modelName)
session = sofie.Session()

x = np.random.normal(0,1,7).astype(np.float32)
x = np.random.normal(0, 1, 7).astype(np.float32)
y = session.infer(x)
ykeras = model(x.reshape(1,7)).numpy()
ykeras = model(x.reshape(1, 7)).numpy()

print("input to model is ",x, "\n\t -> output using SOFIE = ", y[0], " using Keras = ", ykeras[0])
print("input to model is ", x, "\n\t -> output using SOFIE = ", y[0], " using Keras = ", ykeras[0])

if (abs(y[0]-ykeras[0]) > 0.01) :
raise RuntimeError('ERROR: Result is different between SOFIE and Keras')
if abs(y[0] - ykeras[0]) > 0.01:
raise RuntimeError("ERROR: Result is different between SOFIE and Keras")

print("OK")



Loading
Loading