Skip to content

Commit 49cf1a0

Browse files
committed
Add LLVMCodeBuilder class
1 parent 39c3fb5 commit 49cf1a0

File tree

8 files changed

+559
-0
lines changed

8 files changed

+559
-0
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ option(LIBSCRATCHCPP_BUILD_UNIT_TESTS "Build unit tests" ON)
1010
option(LIBSCRATCHCPP_NETWORK_SUPPORT "Support for downloading projects" ON)
1111
option(LIBSCRATCHCPP_COMPUTED_GOTO "Support for computed goto" ON)
1212
option(LIBSCRATCHCPP_USE_LLVM "Compile scripts to LLVM IR (work in progress)" OFF)
13+
option(LIBSCRATCHCPP_PRINT_LLVM_IR "Print LLVM IR of compiled Scratch scripts (for debugging)" OFF)
1314

1415
if (NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
1516
# Computed goto not supported on anything except GCC
@@ -73,6 +74,9 @@ if (LIBSCRATCHCPP_USE_LLVM)
7374
include/scratchcpp/dev/executioncontext.h
7475
)
7576

77+
if(LIBSCRATCHCPP_PRINT_LLVM_IR)
78+
target_compile_definitions(scratchcpp PRIVATE PRINT_LLVM_IR)
79+
endif()
7680
else()
7781
target_sources(scratchcpp
7882
PUBLIC

src/dev/engine/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ target_sources(scratchcpp
44
executioncontext_p.cpp
55
executioncontext_p.h
66
internal/icodebuilder.h
7+
internal/llvmcodebuilder.cpp
8+
internal/llvmcodebuilder.h
79
internal/llvmexecutablecode.cpp
810
internal/llvmexecutablecode.h
911
internal/llvmexecutioncontext.cpp
Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
3+
#include <llvm/Support/TargetSelect.h>
4+
#include <llvm/IR/Verifier.h>
5+
#include <llvm/ExecutionEngine/Orc/LLJIT.h>
6+
#include <scratchcpp/value.h>
7+
8+
#include "llvmcodebuilder.h"
9+
#include "llvmexecutablecode.h"
10+
11+
using namespace libscratchcpp;
12+
13+
LLVMCodeBuilder::LLVMCodeBuilder(const std::string &id) :
14+
m_id(id),
15+
m_module(std::make_unique<llvm::Module>(id, m_ctx)),
16+
m_builder(m_ctx)
17+
{
18+
llvm::InitializeNativeTarget();
19+
llvm::InitializeNativeTargetAsmPrinter();
20+
llvm::InitializeNativeTargetAsmParser();
21+
22+
m_constValues.push_back({});
23+
m_regs.push_back({});
24+
initTypes();
25+
}
26+
27+
LLVMCodeBuilder::~LLVMCodeBuilder()
28+
{
29+
for (const auto &values : m_constValues) {
30+
for (const auto &v : values) {
31+
if (v)
32+
value_free(v.get());
33+
}
34+
}
35+
}
36+
37+
std::shared_ptr<ExecutableCode> LLVMCodeBuilder::finalize()
38+
{
39+
size_t functionIndex = 0;
40+
llvm::Function *currentFunc = beginFunction(functionIndex);
41+
42+
// Execute recorded steps
43+
for (const Step &step : m_steps) {
44+
switch (step.type) {
45+
case Step::Type::FunctionCall: {
46+
std::vector<llvm::Type *> types;
47+
std::vector<llvm::Value *> args;
48+
49+
// Add target pointer arg
50+
assert(currentFunc->arg_size() == 1);
51+
types.push_back(llvm::PointerType::get(llvm::Type::getInt8Ty(m_ctx), 0));
52+
args.push_back(currentFunc->getArg(0));
53+
54+
// Add return value arg if the function returns
55+
if (step.functionReturns) {
56+
types.push_back(m_valueDataType->getPointerTo());
57+
args.push_back(m_regs[functionIndex][step.functionReturnRegIndex]->value);
58+
}
59+
60+
// Args
61+
for (auto &arg : step.args) {
62+
types.push_back(m_valueDataType->getPointerTo());
63+
args.push_back(arg->value);
64+
}
65+
66+
m_builder.CreateCall(resolveFunction(step.functionName, llvm::FunctionType::get(m_builder.getVoidTy(), types, false)), args);
67+
break;
68+
}
69+
70+
case Step::Type::Yield:
71+
endFunction(currentFunc, functionIndex);
72+
currentFunc = beginFunction(++functionIndex);
73+
break;
74+
}
75+
}
76+
77+
endFunction(currentFunc, functionIndex);
78+
79+
#ifdef PRINT_LLVM_IR
80+
std::cout << std::endl << "=== LLVM IR (" << m_module->getName().str() << ") ===" << std::endl;
81+
m_module->print(llvm::outs(), nullptr);
82+
std::cout << "==============" << std::endl << std::endl;
83+
#endif
84+
85+
std::vector<std::unique_ptr<ValueData>> constValues;
86+
87+
for (auto &values : m_constValues) {
88+
for (auto &v : values)
89+
constValues.push_back(std::move(v));
90+
}
91+
92+
m_constValues.clear();
93+
return std::make_shared<LLVMExecutableCode>(std::move(m_module), constValues);
94+
}
95+
96+
void LLVMCodeBuilder::addFunctionCall(const std::string &functionName, int argCount, bool returns)
97+
{
98+
Step step(Step::Type::FunctionCall);
99+
step.functionName = functionName;
100+
101+
assert(m_tmpRegs.size() >= argCount);
102+
103+
for (size_t i = m_tmpRegs.size() - argCount; i < m_tmpRegs.size(); i++)
104+
step.args.push_back(m_tmpRegs[i]);
105+
106+
m_tmpRegs.erase(m_tmpRegs.end() - argCount, m_tmpRegs.end());
107+
108+
if (returns) {
109+
step.functionReturns = true;
110+
auto reg = std::make_shared<Register>();
111+
step.functionReturnRegIndex = m_regs[m_currentFunction].size();
112+
m_regs[m_currentFunction].push_back(reg);
113+
m_tmpRegs.push_back(reg);
114+
}
115+
116+
m_steps.push_back(step);
117+
}
118+
119+
void LLVMCodeBuilder::addConstValue(const Value &value)
120+
{
121+
auto reg = std::make_shared<Register>();
122+
reg->isConstValue = true;
123+
reg->constValueIndex = m_constValues[m_currentFunction].size();
124+
m_regs[m_currentFunction].push_back(reg);
125+
m_tmpRegs.push_back(reg);
126+
127+
std::unique_ptr<ValueData> v = std::make_unique<ValueData>();
128+
value_init(v.get());
129+
value_assign_copy(v.get(), &value.data());
130+
m_constValues[m_currentFunction].push_back(std::move(v));
131+
}
132+
133+
void LLVMCodeBuilder::addVariableValue(Variable *variable)
134+
{
135+
}
136+
137+
void LLVMCodeBuilder::addListContents(List *list)
138+
{
139+
}
140+
141+
void LLVMCodeBuilder::beginIfStatement()
142+
{
143+
}
144+
145+
void LLVMCodeBuilder::beginElseBranch()
146+
{
147+
}
148+
149+
void LLVMCodeBuilder::endIf()
150+
{
151+
}
152+
153+
void LLVMCodeBuilder::beginLoop()
154+
{
155+
}
156+
157+
void LLVMCodeBuilder::endLoop()
158+
{
159+
}
160+
161+
void LLVMCodeBuilder::yield()
162+
{
163+
m_steps.push_back({ Step::Type::Yield });
164+
m_currentFunction++;
165+
166+
assert(m_currentFunction == m_constValues.size());
167+
m_constValues.push_back({});
168+
169+
assert(m_currentFunction == m_regs.size());
170+
m_regs.push_back({});
171+
}
172+
173+
void LLVMCodeBuilder::initTypes()
174+
{
175+
// Create the ValueData struct
176+
llvm::Type *intType = llvm::Type::getInt64Ty(m_ctx); // long (intValue)
177+
llvm::Type *doubleType = llvm::Type::getDoubleTy(m_ctx); // double (doubleValue)
178+
llvm::Type *boolType = llvm::Type::getInt1Ty(m_ctx); // bool (boolValue)
179+
llvm::Type *stringPtrType = llvm::PointerType::get(llvm::Type::getInt8Ty(m_ctx), 0); // char* (stringValue)
180+
181+
// Create the union type (largest type size should dominate)
182+
llvm::StructType *unionType = llvm::StructType::create(m_ctx, "union");
183+
unionType->setBody({ intType, doubleType, boolType, stringPtrType });
184+
185+
// Create the full struct type
186+
llvm::Type *valueType = llvm::Type::getInt32Ty(m_ctx); // Assuming ValueType is a 32-bit enum
187+
llvm::Type *sizeType = llvm::Type::getInt64Ty(m_ctx); // size_t
188+
189+
// Combine them into the full struct
190+
m_valueDataType = llvm::StructType::create(m_ctx, "ValueData");
191+
m_valueDataType->setBody({ unionType, valueType, sizeType });
192+
}
193+
194+
llvm::Function *LLVMCodeBuilder::beginFunction(size_t index)
195+
{
196+
// size_t f#(Target *)
197+
llvm::FunctionType *funcType = llvm::FunctionType::get(m_builder.getInt64Ty(), llvm::PointerType::get(llvm::Type::getInt8Ty(m_ctx), 0), false);
198+
llvm::Function *func = llvm::Function::Create(funcType, llvm::Function::ExternalLinkage, "f" + std::to_string(index), m_module.get());
199+
200+
llvm::BasicBlock *entry = llvm::BasicBlock::Create(m_ctx, "entry", func);
201+
m_builder.SetInsertPoint(entry);
202+
203+
// Add const value pointers
204+
const auto &constValues = m_constValues[index];
205+
std::vector<llvm::Value *> constPtrs;
206+
207+
for (size_t i = 0; i < constValues.size(); i++) {
208+
llvm::Value *intAddress = m_builder.getInt64((uintptr_t)constValues[i].get());
209+
llvm::Value *ptr = m_builder.CreateIntToPtr(intAddress, m_valueDataType->getPointerTo());
210+
constPtrs.push_back(ptr);
211+
}
212+
213+
// Add registers
214+
auto &regs = m_regs[index];
215+
size_t regIndex = 0;
216+
217+
for (auto &reg : regs) {
218+
if (reg->isConstValue) {
219+
// Do not allocate space for existing constant values
220+
reg->value = constPtrs[reg->constValueIndex];
221+
} else {
222+
const std::string name = "r" + std::to_string(regIndex);
223+
224+
llvm::Value *valueData = m_builder.CreateAlloca(m_valueDataType, nullptr, name);
225+
m_builder.CreateCall(resolve_value_init(), { valueData });
226+
227+
reg->value = valueData;
228+
regIndex++;
229+
}
230+
}
231+
232+
return func;
233+
}
234+
235+
void LLVMCodeBuilder::endFunction(llvm::Function *func, size_t index)
236+
{
237+
if (!m_tmpRegs.empty()) {
238+
std::cout
239+
<< "warning: " << m_tmpRegs.size() << " registers were leaked by script '" << m_module->getName().str() << "', function '" << func->getName().str()
240+
<< "' (if you see this as a regular user, this is a bug and should be reported)" << std::endl;
241+
}
242+
243+
// Return next function index
244+
m_builder.CreateRet(m_builder.getInt64(index + 1));
245+
246+
if (llvm::verifyFunction(*func, &llvm::errs())) {
247+
llvm::errs() << "error: LLVM function verficiation failed!\n";
248+
llvm::errs() << "script hat ID: " << m_id << "\n";
249+
llvm::errs() << "function name: " << func->getName().data() << "\n";
250+
}
251+
}
252+
253+
llvm::FunctionCallee LLVMCodeBuilder::resolveFunction(const std::string name, llvm::FunctionType *type)
254+
{
255+
return m_module->getOrInsertFunction(name, type);
256+
}
257+
258+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_init()
259+
{
260+
return resolveFunction("value_init", llvm::FunctionType::get(m_builder.getVoidTy(), m_valueDataType->getPointerTo(), false));
261+
}
262+
263+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_free()
264+
{
265+
return resolveFunction("value_free", llvm::FunctionType::get(m_builder.getVoidTy(), m_valueDataType->getPointerTo(), false));
266+
}
267+
268+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_assign_long()
269+
{
270+
return resolveFunction("value_assign_long", llvm::FunctionType::get(m_builder.getVoidTy(), { m_valueDataType->getPointerTo(), m_builder.getInt64Ty() }, false));
271+
}
272+
273+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_assign_double()
274+
{
275+
return resolveFunction("value_assign_double", llvm::FunctionType::get(m_builder.getVoidTy(), { m_valueDataType->getPointerTo(), m_builder.getDoubleTy() }, false));
276+
}
277+
278+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_assign_bool()
279+
{
280+
return resolveFunction("value_assign_double", llvm::FunctionType::get(m_builder.getVoidTy(), { m_valueDataType->getPointerTo(), m_builder.getInt1Ty() }, false));
281+
}
282+
283+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_assign_cstring()
284+
{
285+
return resolveFunction("value_assign_cstring", llvm::FunctionType::get(m_builder.getVoidTy(), { m_valueDataType->getPointerTo(), llvm::PointerType::get(llvm::Type::getInt8Ty(m_ctx), 0) }, false));
286+
}
287+
288+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_assign_special()
289+
{
290+
return resolveFunction("value_assign_special", llvm::FunctionType::get(m_builder.getVoidTy(), { m_valueDataType->getPointerTo(), m_builder.getInt32Ty() }, false));
291+
}

0 commit comments

Comments
 (0)