|
| 1 | +// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. |
| 2 | + |
| 3 | +using System; |
| 4 | +using System.Runtime.InteropServices; |
| 5 | +using TorchSharp.PInvoke; |
| 6 | +using static TorchSharp.PInvoke.NativeMethods; |
| 7 | + |
| 8 | +namespace TorchSharp |
| 9 | +{ |
| 10 | + public static partial class torch |
| 11 | + { |
| 12 | + public static partial class export |
| 13 | + { |
| 14 | + /// <summary> |
| 15 | + /// Load a PyTorch ExportedProgram from a .pt2 file compiled with AOTInductor. |
| 16 | + /// </summary> |
| 17 | + /// <param name="filename">Path to the .pt2 file</param> |
| 18 | + /// <returns>ExportedProgram model for inference</returns> |
| 19 | + /// <remarks> |
| 20 | + /// IMPORTANT: The .pt2 file must be compiled with torch._inductor.aoti_compile_and_package() in Python. |
| 21 | + /// Models saved with torch.export.save() alone will NOT work - they require AOTInductor compilation. |
| 22 | + /// |
| 23 | + /// This implementation is INFERENCE-ONLY. Training, parameter updates, and device movement |
| 24 | + /// are not supported. The model is compiled for a specific device (CPU/CUDA) at compile time. |
| 25 | + /// |
| 26 | + /// Example Python code to create compatible .pt2 files: |
| 27 | + /// <code> |
| 28 | + /// import torch |
| 29 | + /// import torch._inductor |
| 30 | + /// |
| 31 | + /// # Export the model |
| 32 | + /// exported = torch.export.export(model, example_inputs) |
| 33 | + /// |
| 34 | + /// # Compile with AOTInductor (required for C++ loading) |
| 35 | + /// torch._inductor.aoti_compile_and_package( |
| 36 | + /// exported, |
| 37 | + /// package_path="model.pt2" |
| 38 | + /// ) |
| 39 | + /// </code> |
| 40 | + /// </remarks> |
| 41 | + public static ExportedProgram load(string filename) |
| 42 | + { |
| 43 | + return new ExportedProgram(filename); |
| 44 | + } |
| 45 | + |
| 46 | + /// <summary> |
| 47 | + /// Load a PyTorch ExportedProgram with typed output. |
| 48 | + /// </summary> |
| 49 | + public static ExportedProgram<TResult> load<TResult>(string filename) |
| 50 | + { |
| 51 | + return new ExportedProgram<TResult>(filename); |
| 52 | + } |
| 53 | + } |
| 54 | + } |
| 55 | + |
| 56 | + /// <summary> |
| 57 | + /// Represents a PyTorch ExportedProgram loaded from an AOTInductor-compiled .pt2 file. |
| 58 | + /// This is an INFERENCE-ONLY implementation - training and parameter updates are not supported. |
| 59 | + /// </summary> |
| 60 | + /// <remarks> |
| 61 | + /// Unlike TorchScript models, ExportedProgram models are ahead-of-time (AOT) compiled for |
| 62 | + /// a specific device and are optimized for inference performance. They provide 30-40% better |
| 63 | + /// latency compared to TorchScript in many cases. |
| 64 | + /// |
| 65 | + /// Key limitations: |
| 66 | + /// - Inference only (no training, no gradients) |
| 67 | + /// - No parameter access or updates |
| 68 | + /// - No device movement (compiled for specific device) |
| 69 | + /// - No dynamic model structure changes |
| 70 | + /// |
| 71 | + /// Use torch.jit for models that require training or dynamic behavior. |
| 72 | + /// </remarks> |
| 73 | + public class ExportedProgram : IDisposable |
| 74 | + { |
| 75 | + private IntPtr handle; |
| 76 | + private bool _disposed = false; |
| 77 | + |
| 78 | + internal ExportedProgram(string filename) |
| 79 | + { |
| 80 | + handle = THSExport_load(filename); |
| 81 | + if (handle == IntPtr.Zero) |
| 82 | + torch.CheckForErrors(); |
| 83 | + } |
| 84 | + |
| 85 | + /// <summary> |
| 86 | + /// Run inference on the model with the given input tensors. |
| 87 | + /// </summary> |
| 88 | + /// <param name="inputs">Input tensors for the model</param> |
| 89 | + /// <returns>Array of output tensors</returns> |
| 90 | + /// <remarks> |
| 91 | + /// The number and shapes of inputs must match what the model was exported with. |
| 92 | + /// All inputs must be on the same device that the model was compiled for. |
| 93 | + /// </remarks> |
| 94 | + public torch.Tensor[] run(params torch.Tensor[] inputs) |
| 95 | + { |
| 96 | + if (_disposed) |
| 97 | + throw new ObjectDisposedException(nameof(ExportedProgram)); |
| 98 | + |
| 99 | + // Convert managed tensors to IntPtr array |
| 100 | + IntPtr[] input_handles = new IntPtr[inputs.Length]; |
| 101 | + for (int i = 0; i < inputs.Length; i++) |
| 102 | + { |
| 103 | + input_handles[i] = inputs[i].Handle; |
| 104 | + } |
| 105 | + |
| 106 | + // Call native run method |
| 107 | + THSExport_Module_run(handle, input_handles, inputs.Length, out IntPtr result_ptr, out int result_length); |
| 108 | + torch.CheckForErrors(); |
| 109 | + |
| 110 | + // Marshal result array |
| 111 | + torch.Tensor[] results = new torch.Tensor[result_length]; |
| 112 | + IntPtr[] result_handles = new IntPtr[result_length]; |
| 113 | + Marshal.Copy(result_ptr, result_handles, 0, result_length); |
| 114 | + |
| 115 | + for (int i = 0; i < result_length; i++) |
| 116 | + { |
| 117 | + results[i] = new torch.Tensor(result_handles[i]); |
| 118 | + } |
| 119 | + |
| 120 | + // Free the native array (tensors are now owned by managed Tensor objects) |
| 121 | + Marshal.FreeHGlobal(result_ptr); |
| 122 | + |
| 123 | + return results; |
| 124 | + } |
| 125 | + |
| 126 | + /// <summary> |
| 127 | + /// Synonym for run() - executes forward pass. |
| 128 | + /// </summary> |
| 129 | + public torch.Tensor[] forward(params torch.Tensor[] inputs) => run(inputs); |
| 130 | + |
| 131 | + /// <summary> |
| 132 | + /// Synonym for run() - executes the model. |
| 133 | + /// </summary> |
| 134 | + public torch.Tensor[] call(params torch.Tensor[] inputs) => run(inputs); |
| 135 | + |
| 136 | + public void Dispose() |
| 137 | + { |
| 138 | + Dispose(true); |
| 139 | + GC.SuppressFinalize(this); |
| 140 | + } |
| 141 | + |
| 142 | + protected virtual void Dispose(bool disposing) |
| 143 | + { |
| 144 | + if (!_disposed) |
| 145 | + { |
| 146 | + if (handle != IntPtr.Zero) |
| 147 | + { |
| 148 | + THSExport_Module_dispose(handle); |
| 149 | + handle = IntPtr.Zero; |
| 150 | + } |
| 151 | + _disposed = true; |
| 152 | + } |
| 153 | + } |
| 154 | + |
| 155 | + ~ExportedProgram() |
| 156 | + { |
| 157 | + Dispose(false); |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + /// <summary> |
| 162 | + /// Generic version of ExportedProgram with typed output. |
| 163 | + /// </summary> |
| 164 | + /// <typeparam name="TResult">The return type (Tensor, Tensor[], or tuple of Tensors)</typeparam> |
| 165 | + public class ExportedProgram<TResult> : ExportedProgram |
| 166 | + { |
| 167 | + internal ExportedProgram(string filename) : base(filename) |
| 168 | + { |
| 169 | + } |
| 170 | + |
| 171 | + /// <summary> |
| 172 | + /// Run inference with typed return value. |
| 173 | + /// </summary> |
| 174 | + public new TResult run(params torch.Tensor[] inputs) |
| 175 | + { |
| 176 | + var results = base.run(inputs); |
| 177 | + |
| 178 | + // Handle different return types |
| 179 | + if (typeof(TResult) == typeof(torch.Tensor)) |
| 180 | + { |
| 181 | + if (results.Length != 1) |
| 182 | + throw new InvalidOperationException($"Expected 1 output tensor, got {results.Length}"); |
| 183 | + return (TResult)(object)results[0]; |
| 184 | + } |
| 185 | + |
| 186 | + if (typeof(TResult) == typeof(torch.Tensor[])) |
| 187 | + { |
| 188 | + return (TResult)(object)results; |
| 189 | + } |
| 190 | + |
| 191 | + // Handle tuple types |
| 192 | + if (typeof(TResult).IsGenericType) |
| 193 | + { |
| 194 | + var genericType = typeof(TResult).GetGenericTypeDefinition(); |
| 195 | + if (genericType == typeof(ValueTuple<,>)) |
| 196 | + { |
| 197 | + if (results.Length != 2) |
| 198 | + throw new InvalidOperationException($"Expected 2 output tensors, got {results.Length}"); |
| 199 | + return (TResult)Activator.CreateInstance(typeof(TResult), results[0], results[1]); |
| 200 | + } |
| 201 | + if (genericType == typeof(ValueTuple<,,>)) |
| 202 | + { |
| 203 | + if (results.Length != 3) |
| 204 | + throw new InvalidOperationException($"Expected 3 output tensors, got {results.Length}"); |
| 205 | + return (TResult)Activator.CreateInstance(typeof(TResult), results[0], results[1], results[2]); |
| 206 | + } |
| 207 | + } |
| 208 | + |
| 209 | + throw new NotSupportedException($"Return type {typeof(TResult)} is not supported"); |
| 210 | + } |
| 211 | + |
| 212 | + public new TResult forward(params torch.Tensor[] inputs) => run(inputs); |
| 213 | + public new TResult call(params torch.Tensor[] inputs) => run(inputs); |
| 214 | + } |
| 215 | +} |
0 commit comments