Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,75 @@
[submodule "LLVM/extern/Serde"]
path = LLVM/extern/Serde
url = https://github.com/AntoineBastide47/Serde.git
[submodule "LLVM/extern/tree-sitter/tree-sitter"]
path = LLVM/extern/tree-sitter/tree-sitter
url = https://github.com/tree-sitter/tree-sitter.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-javascript"]
path = LLVM/extern/tree-sitter/tree-sitter-javascript
url = https://github.com/tree-sitter/tree-sitter-javascript.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-typescript"]
path = LLVM/extern/tree-sitter/tree-sitter-typescript
url = https://github.com/tree-sitter/tree-sitter-typescript.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-java"]
path = LLVM/extern/tree-sitter/tree-sitter-java
url = https://github.com/tree-sitter/tree-sitter-java.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-c-sharp"]
path = LLVM/extern/tree-sitter/tree-sitter-c-sharp
url = https://github.com/tree-sitter/tree-sitter-c-sharp.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-rust"]
path = LLVM/extern/tree-sitter/tree-sitter-rust
url = https://github.com/tree-sitter/tree-sitter-rust.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-ruby"]
path = LLVM/extern/tree-sitter/tree-sitter-ruby
url = https://github.com/tree-sitter/tree-sitter-ruby.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-php"]
path = LLVM/extern/tree-sitter/tree-sitter-php
url = https://github.com/tree-sitter/tree-sitter-php.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-perl"]
path = LLVM/extern/tree-sitter/tree-sitter-perl
url = https://github.com/tree-sitter-perl/tree-sitter-perl.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-lua"]
path = LLVM/extern/tree-sitter/tree-sitter-lua
url = https://github.com/tree-sitter-grammars/tree-sitter-lua.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-swift"]
path = LLVM/extern/tree-sitter/tree-sitter-swift
url = https://github.com/alex-pinkus/tree-sitter-swift.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-kotlin"]
path = LLVM/extern/tree-sitter/tree-sitter-kotlin
url = https://github.com/fwcd/tree-sitter-kotlin.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-r"]
path = LLVM/extern/tree-sitter/tree-sitter-r
url = https://github.com/r-lib/tree-sitter-r.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-scala"]
path = LLVM/extern/tree-sitter/tree-sitter-scala
url = https://github.com/tree-sitter/tree-sitter-scala.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-haskell"]
path = LLVM/extern/tree-sitter/tree-sitter-haskell
url = https://github.com/tree-sitter/tree-sitter-haskell.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-ocaml"]
path = LLVM/extern/tree-sitter/tree-sitter-ocaml
url = https://github.com/tree-sitter/tree-sitter-ocaml.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-erlang"]
path = LLVM/extern/tree-sitter/tree-sitter-erlang
url = https://github.com/WhatsApp/tree-sitter-erlang.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-elixir"]
path = LLVM/extern/tree-sitter/tree-sitter-elixir
url = https://github.com/elixir-lang/tree-sitter-elixir.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-dart"]
path = LLVM/extern/tree-sitter/tree-sitter-dart
url = https://github.com/UserNobody14/tree-sitter-dart.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-objc"]
path = LLVM/extern/tree-sitter/tree-sitter-objc
url = https://github.com/tree-sitter-grammars/tree-sitter-objc.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-glsl"]
path = LLVM/extern/tree-sitter/tree-sitter-glsl
url = https://github.com/tree-sitter-grammars/tree-sitter-glsl.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-hlsl"]
path = LLVM/extern/tree-sitter/tree-sitter-hlsl
url = https://github.com/tree-sitter-grammars/tree-sitter-hlsl.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-go"]
path = LLVM/extern/tree-sitter/tree-sitter-go
url = https://github.com/tree-sitter/tree-sitter-go.git
[submodule "LLVM/extern/tree-sitter/tree-sitter-python"]
path = LLVM/extern/tree-sitter/tree-sitter-python
url = https://github.com/tree-sitter/tree-sitter-python.git
3 changes: 2 additions & 1 deletion LLVM/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ build-test
./ignore
./include/LanguageModel.generated.hpp
result/**
train/__pycache__
train/__pycache__
repos/**
38 changes: 15 additions & 23 deletions LLVM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ add_executable(${PROJECT_NAME}
include/LanguageStats.hpp
include/LanguageModel.generated.hpp
src/LanguageParser.cpp
src/TreeSitterParser.cpp
include/LanguageParser.hpp
include/LanguageData.hpp
include/TreeSitterParser.hpp
)

add_executable(LanguageClassifierTests
Expand Down Expand Up @@ -91,8 +93,10 @@ add_executable(ParserFixtureTests
include/LanguageStats.hpp
include/LanguageModel.generated.hpp
src/LanguageParser.cpp
src/TreeSitterParser.cpp
include/LanguageParser.hpp
include/LanguageData.hpp
include/TreeSitterParser.hpp
)
target_include_directories(ParserFixtureTests PRIVATE include)
target_link_libraries(ParserFixtureTests clangLex clangBasic)
Expand Down Expand Up @@ -135,8 +139,10 @@ add_executable(DebugLanguageOutputTests
include/LanguageStats.hpp
include/LanguageModel.generated.hpp
src/LanguageParser.cpp
src/TreeSitterParser.cpp
include/LanguageParser.hpp
include/LanguageData.hpp
include/TreeSitterParser.hpp
)
target_include_directories(DebugLanguageOutputTests PRIVATE include)
target_link_libraries(DebugLanguageOutputTests clangLex clangBasic)
Expand Down Expand Up @@ -173,8 +179,10 @@ add_executable(StringLiteralDecodeTests
include/LanguageStats.hpp
include/LanguageModel.generated.hpp
src/LanguageParser.cpp
src/TreeSitterParser.cpp
include/LanguageParser.hpp
include/LanguageData.hpp
include/TreeSitterParser.hpp
)
target_include_directories(StringLiteralDecodeTests PRIVATE include)
target_link_libraries(StringLiteralDecodeTests clangLex clangBasic)
Expand All @@ -192,29 +200,6 @@ find_package(Clang REQUIRED CONFIG)

include_directories(${LLVM_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} clangLex clangBasic)
target_compile_definitions(${PROJECT_NAME} PRIVATE MATCHERTEXT_PARSERS_DIR="${CMAKE_SOURCE_DIR}/parsers")

# === Go parser binary ===
# Pre-compile parsers/parser.go into a native binary so each per-file invocation
# avoids recompiling, and dodges `go run`'s rule that disallows .go arguments
# from a different directory than the source file.
find_program(GO_EXECUTABLE go)
if (GO_EXECUTABLE)
set(GO_PARSER_BIN "${CMAKE_BINARY_DIR}/matchertext_go_parser")
add_custom_command(
OUTPUT ${GO_PARSER_BIN}
COMMAND ${GO_EXECUTABLE} build -o ${GO_PARSER_BIN} ${CMAKE_SOURCE_DIR}/parsers/parser.go
DEPENDS ${CMAKE_SOURCE_DIR}/parsers/parser.go
COMMENT "Building Go parser binary"
VERBATIM
)
add_custom_target(go_parser ALL DEPENDS ${GO_PARSER_BIN})
add_dependencies(${PROJECT_NAME} go_parser)
target_compile_definitions(${PROJECT_NAME} PRIVATE
MATCHERTEXT_GO_PARSER_BIN="${GO_PARSER_BIN}")
else ()
message(WARNING "go executable not found; Go parsing will be disabled at runtime")
endif ()

# === OpenMP ===
find_package(OpenMP QUIET)
Expand All @@ -234,6 +219,13 @@ target_link_libraries(ParserFixtureTests Serde)
target_link_libraries(DebugLanguageOutputTests Serde)
target_link_libraries(StringLiteralDecodeTests Serde)

# === tree-sitter (in-process grammars for ~21 programming languages) ===
add_subdirectory(extern/tree-sitter)
target_link_libraries(${PROJECT_NAME} tree-sitter-grammars)
target_link_libraries(ParserFixtureTests tree-sitter-grammars)
target_link_libraries(DebugLanguageOutputTests tree-sitter-grammars)
target_link_libraries(StringLiteralDecodeTests tree-sitter-grammars)

# --------------------------
# Build Configs
# --------------------------
Expand Down
Loading