6565// #include "mlir/Dialect/SparseTensor/Pipelines/Passes.h"
6666// #include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
6767#include " mlir/Dialect/Tensor/Transforms/Passes.h"
68- // #include "mlir/Dialect/Tosa/Transforms/Passes.h"
68+ #include " mlir/Dialect/Tosa/Transforms/Passes.h"
6969// #include "mlir/Dialect/Transform/Transforms/Passes.h"
7070// #include "mlir/Dialect/Vector/Transforms/Passes.h"
7171#include " mlir/Transforms/Passes.h"
7272// #include <mlir/InitAllPasses.h>
7373
7474#include " mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
7575#include " mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
76-
7776#include " mlir/ExecutionEngine/ExecutionEngine.h"
7877#include " mlir/ExecutionEngine/OptUtils.h"
7978#include " mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
8079
80+ #include < llvm/Support/raw_sha1_ostream.h>
81+
8182#include < imex/Dialect/PTensor/IR/PTensorOps.h>
8283#include < imex/InitIMEXDialects.h>
8384#include < imex/InitIMEXPasses.h>
@@ -178,7 +179,6 @@ std::vector<void *> DepManager::store_inputs() {
178179 std::vector<void *> res;
179180 for (auto a : _args) {
180181 auto f = Registry::get (a.first );
181- std::cerr << " store guid " << a.first ;
182182 f.get ().get ()->add_to_args (res, a.second );
183183 _ivm.erase (a.first ); // inputs need no delivery
184184 _icm.erase (a.first );
@@ -254,7 +254,8 @@ uint64_t DepManager::handleResult(::mlir::OpBuilder &builder) {
254254 return 2 * sz;
255255}
256256
257- void DepManager::deliver (intptr_t *output, uint64_t sz) {
257+ void DepManager::deliver (std::vector<intptr_t > &outputV, uint64_t sz) {
258+ auto output = outputV.data ();
258259 size_t pos = 0 ;
259260 for (auto &v : _icm) {
260261 auto rank = _irm[v.first ];
@@ -305,14 +306,30 @@ void DepManager::deliver(intptr_t *output, uint64_t sz) {
305306 }
306307}
307308
308- int JIT::run (::mlir::ModuleOp &module , const std::string &fname,
309- std::vector<void *> &inp, intptr_t *out) {
310- // lower to LLVM
311- if (::mlir::failed (_pm.run (module )))
312- throw std::runtime_error (" failed to run pass manager" );
313-
314- if (_verbose)
315- module .dump ();
309+ std::vector<intptr_t > JIT::run (::mlir::ModuleOp &module ,
310+ const std::string &fname,
311+ std::vector<void *> &inp, size_t osz) {
312+ if (_useCache) {
313+ ::mlir::ModuleOp cached;
314+ static std::vector<
315+ std::pair<std::array<unsigned char , 20 >, ::mlir::ModuleOp>>
316+ cache;
317+ llvm::raw_sha1_ostream xxx;
318+ module ->print (xxx);
319+ auto cksm = xxx.sha1 ();
320+ for (auto x : cache) {
321+ if (x.first == cksm) {
322+ cached = x.second ;
323+ break ;
324+ }
325+ }
326+ if (cached) {
327+ module = cached;
328+ std::cerr << " using cached module" << std::endl;
329+ } else {
330+ cache.push_back (std::make_pair (cksm, module ));
331+ }
332+ }
316333
317334 // An optimization pipeline to use within the execution engine.
318335 auto optPipeline =
@@ -322,21 +339,27 @@ int JIT::run(::mlir::ModuleOp &module, const std::string &fname,
322339
323340 // Create an ::mlir execution engine. The execution engine eagerly
324341 // JIT-compiles the module.
325- ::mlir::ExecutionEngineOptions engineOptions;
326- engineOptions.transformer = optPipeline;
327- // const char * crunner = getenv("DDPT_CRUNNER_SO");
328- // crunner = crunner ? crunner : "libmlir_c_runner_utils.so";
329- const char *idtr = getenv (" DDPT_IDTR_SO" );
330- idtr = idtr ? idtr : " libidtr.so" ;
331- // ::llvm::ArrayRef<::llvm::StringRef> shlibs = {crunner, idtr};
332- engineOptions.sharedLibPaths = {idtr};
333- auto maybeEngine = ::mlir::ExecutionEngine::create (module , engineOptions);
342+ ::mlir::ExecutionEngineOptions opts;
343+ opts.transformer = optPipeline;
344+ opts.sharedLibPaths = {_sharedLibPaths};
345+ opts.enableObjectDump = _useCache;
346+
347+ // lower to LLVM
348+ if (::mlir::failed (_pm.run (module )))
349+ throw std::runtime_error (" failed to run pass manager" );
350+
351+ if (_verbose)
352+ module .dump ();
353+
354+ auto maybeEngine = ::mlir::ExecutionEngine::create (module , opts);
334355 assert (maybeEngine && " failed to construct an execution engine" );
335356 auto &engine = maybeEngine.get ();
336357
337358 llvm::SmallVector<void *> args;
359+ std::vector<intptr_t > out (osz);
360+ auto tmp = out.data ();
338361 // first arg must be the result ptr
339- args.push_back (&out );
362+ args.push_back (&tmp );
340363 // we need a void*& for every input tensor
341364 // we refer directly to the storage in inp
342365 for (auto &arg : inp) {
@@ -350,7 +373,7 @@ int JIT::run(::mlir::ModuleOp &module, const std::string &fname,
350373 throw std::runtime_error (" JIT invocation failed" );
351374 }
352375
353- return 0 ;
376+ return out ;
354377}
355378
356379static const char *pass_pipeline =
@@ -362,11 +385,13 @@ static const char *pass_pipeline =
362385 // "builtin.module(func.func(ptensor-dist),convert-dist-to-standard,convert-ptensor-to-linalg,arith-bufferize,func.func(empty-tensor-to-alloc-tensor,scf-bufferize,linalg-bufferize,tensor-bufferize,bufferization-bufferize),func-bufferize,func.func(finalizing-bufferize,convert-linalg-to-parallel-loops),canonicalize,fold-memref-alias-ops,expand-strided-metadata,lower-affine,convert-scf-to-cf,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)";
363386 : " func.func(ptensor-dist,dist-coalesce),convert-dist-to-standard,"
364387 " convert-ptensor-to-linalg,canonicalize,convert-shape-to-std,arith-"
365- " expand,canonicalize,arith-bufferize,func-bufferize,func.func(empty-"
366- " tensor-to-alloc-tensor,scf-bufferize,tensor-bufferize,linalg-"
388+ " expand,canonicalize,arith-bufferize,func-bufferize,func.func(tosa-"
389+ " to-linalg,"
390+ " empty-tensor-to-alloc-tensor,scf-bufferize,tensor-bufferize,linalg-"
367391 " bufferize,bufferization-bufferize,linalg-detensorize,tensor-"
368392 " bufferize,finalizing-bufferize,convert-linalg-to-parallel-loops),"
369- " canonicalize,fold-memref-alias-ops,expand-strided-metadata,lower-"
393+ " canonicalize,fold-memref-alias-ops,expand-strided-metadata,convert-"
394+ " math-to-funcs,convert-math-to-libm,lower-"
370395 " affine,convert-scf-to-cf,convert-memref-to-llvm,convert-func-to-"
371396 " llvm,reconcile-unrealized-casts" ;
372397JIT::JIT ()
@@ -391,12 +416,27 @@ JIT::JIT()
391416 if (v == " 1" || v == " y" || v == " Y" || v == " on" || v == " ON" )
392417 _verbose = true ;
393418 }
419+ _pm.enableTiming ();
394420 // some verbosity
395421 if (_verbose) {
396422 _pm.enableStatistics ();
397423 _pm.enableIRPrinting ();
398424 _pm.dump ();
399425 }
426+
427+ const char *envptr = getenv (" DDPT_USE_CACHE" );
428+ envptr = envptr ? envptr : " 1" ;
429+ {
430+ auto c = std::string (envptr);
431+ _useCache = c == " 1" || c == " y" || c == " Y" || c == " on" || c == " ON" ;
432+ std::cerr << " enableObjectDump=" << _useCache << std::endl;
433+ }
434+
435+ // const char * crunner = getenv("DDPT_CRUNNER_SO");
436+ // crunner = crunner ? crunner : "libmlir_c_runner_utils.so";
437+ envptr = getenv (" DDPT_IDTR_SO" );
438+ _sharedLibPaths = envptr ? envptr : " libidtr.so" ;
439+ // ::llvm::ArrayRef<::llvm::StringRef> shlibs = {crunner, envptr};
400440}
401441
402442// register dialects and passes
@@ -411,6 +451,10 @@ void init() {
411451 ::mlir::registerConvertShapeToStandardPass ();
412452 ::mlir::tensor::registerTensorPasses ();
413453 ::mlir::registerLinalgPasses ();
454+ ::mlir::registerTosaToLinalg ();
455+ ::mlir::registerConvertMathToFuncs ();
456+ ::mlir::registerConvertMathToLibm ();
457+ ::mlir::tosa::registerTosaOptPasses ();
414458 ::mlir::func::registerFuncPasses ();
415459 ::mlir::registerConvertFuncToLLVMPass ();
416460 ::mlir::bufferization::registerBufferizationPasses ();
0 commit comments