Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
49967e6
Changed fields name from Count.java so it will automatically extract …
mavroudo Mar 2, 2025
76ba4b9
Extended the SparkConf object to include 3 jars. These jars allow Spa…
mavroudo Mar 2, 2025
f5f5718
Modified functions getEventNames, getEventPairs, getCounts, getCountF…
mavroudo Mar 2, 2025
bb654a3
Was able to get a simple detection query to work on cluster. Pending …
mavroudo Mar 2, 2025
6ea7d55
Can support from/till queries in the detection
mavroudo Mar 3, 2025
5d2e9b5
Can support group of traces now
mavroudo Mar 3, 2025
d3d09eb
Removing unused methods and adding some commenting
mavroudo Mar 4, 2025
dab59e9
Restructure the code in the SparkDatabaseRepository.java and S3Connec…
mavroudo Mar 4, 2025
28eaf26
Was able to extract the complete set of constraints for positions, ex…
mavroudo Mar 4, 2025
127055a
Was able to extract the complete set of constraints for positions, ex…
mavroudo Mar 4, 2025
cf8b764
Simple order relation extraction works.
mavroudo Mar 4, 2025
a39a8f8
All ordered relations (along with the QueryPlanDeclareAll.java) are w…
mavroudo Mar 5, 2025
75b91bb
All state extractions are executed and output is coming out
mavroudo Mar 5, 2025
df6f4f6
Remove references to RDD
mavroudo Mar 5, 2025
3a29132
Some minor fixes including DeclareController, the Dockerfile and a te…
mavroudo Mar 5, 2025
bb23364
merged with current version of main
mavroudo Mar 12, 2025
28ae0b6
Fixed so that delta can work together with parquet. Also made sure th…
mavroudo Mar 12, 2025
223c210
Exploration query fix - Sorting IMR events
antliarokapis Mar 17, 2025
f322902
Fixed the issue where single detection was not working correctly
mavroudo Mar 20, 2025
1decc52
Fixed issue with matches that contained null events
mavroudo Mar 20, 2025
95ec7f4
Merge branch 'exploration' into cluster
mavroudo Mar 20, 2025
91de196
Fixed issue with matches that contained null events
mavroudo Mar 20, 2025
3293330
Fixed issue with Kleene* when working in PatternDetectionSingle. The …
mavroudo Apr 3, 2025
1dbe9e1
Merge pull request #11 from siesta-tool/bugfix_detection
mavroudo Apr 3, 2025
d3bf47c
Fixed issue that exploration was not working correctly for a single e…
mavroudo Apr 4, 2025
b0550cd
Added CassandraConnector
balaktsis Oct 13, 2025
4800750
Exploit index's extra info in cassandra
balaktsis Oct 13, 2025
eac2cad
Adjusting output
balaktsis Oct 14, 2025
82f7ef5
Fixed major parsing bug for index table
balaktsis Oct 31, 2025
bd8759a
Fixed compose file
balaktsis Oct 31, 2025
6f46148
Enabled both clustered and local execution
balaktsis Nov 12, 2025
c9dd98f
Simplified configurations
balaktsis Nov 12, 2025
3bb57a8
Added configurations
balaktsis Nov 12, 2025
db78dbf
Removed static IP
balaktsis Nov 12, 2025
12ba53f
Fixed master host bug
balaktsis Nov 12, 2025
e9b179b
Modified stuff
balaktsis Nov 14, 2025
b01dc27
Add files via upload
balaktsis Nov 26, 2025
26e41cb
Delete docker-compose-master.yml
balaktsis Nov 26, 2025
5ca65f0
Delete docker-compose-worker.yml
balaktsis Nov 26, 2025
2181ee6
Create docker-compose-swarm.yml
balaktsis Nov 26, 2025
8faf38f
Add Scylla service to docker-compose
balaktsis Nov 26, 2025
b37106c
Update S3 endpoint in docker-compose.yml
balaktsis Nov 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
target/classes/*
target/test-classes/*
target/*
src/main/resources/lib/*.jar
src/main/resources/jars/*.jar
siesta-query-processor.iml
experiments/*

Expand Down
11 changes: 9 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ FROM ubuntu:20.04

#ENV JAVA_HOME="/usr/lib/jvm/default-jvm/"

RUN apt-get update && apt-get install -y openjdk-17-jdk maven && \
RUN apt-get update && apt-get install -y openjdk-17-jdk maven wget && \
echo "export JAVA_HOME=$(dirname $(dirname $(readlink -f $(which java))))" >> /etc/profile.d/java.sh
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
ENV PATH=$PATH:${JAVA_HOME}/bin

ENV JARS_DIR=/code/src/main/resources/jars


# Install maven
Expand All @@ -20,8 +20,15 @@ RUN mvn dependency:resolve

# Adding source, compile and package into a fat jar
ADD src /code/src
RUN test -f ${JARS_DIR}/hadoop-aws-3.3.4.jar || wget -P ${JARS_DIR} https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar
RUN test -f ${JARS_DIR}/aws-java-sdk-bundle-1.12.262.jar || wget -P ${JARS_DIR} https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar
RUN test -f ${JARS_DIR}/hadoop-client-3.3.4.jar || wget -P ${JARS_DIR} https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-client/3.3.4/hadoop-client-3.3.4.jar
RUN test -f ${JARS_DIR}/delta-spark_2.12-3.3.0.jar || wget -P ${JARS_DIR} https://repo1.maven.org/maven2/io/delta/delta-spark_2.12/3.3.0/delta-spark_2.12-3.3.0.jar
RUN test -f ${JARS_DIR}/delta-storage-3.3.0.jar || wget -P ${JARS_DIR} https://repo1.maven.org/maven2/io/delta/delta-storage/3.3.0/delta-storage-3.3.0.jar
RUN mvn clean compile package -f pom.xml -DskipTests

# Making sure jars are where they should be


CMD ["java", "--add-exports", "java.base/sun.nio.ch=ALL-UNNAMED" , "-jar", "target/siesta-query-processor-3.0.jar"]
#ENTRYPOINT ["tail", "-f", "/dev/null"]
50 changes: 25 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,37 +41,37 @@ To run it locally specify the properties in the application.properties file in t
and then run the project after adding in the ``Add VM options`` under ``Run/Edit configurations`` the following line \
``--add-opens=java.base/sun.nio.ch=ALL-UNNAMED ``

### Running in docker
To run it locally open a terminal inside SequenceDetectionQueryExecutor file and run:
```bash
docker-compose build
docker-compose up -d
```
### Running with Docker

#### Single Machine Deployment (Development/Testing)

To run everything on one machine:

Ensure that this docker and the database can communicate, either run database on a public ip
or connect these two on the same network. You need to specify these environment variables
(you can keep the default ones if you want) in
the docker-compose file before running the QueryExecutor.

#### Distributed Cluster Deployment (Production)

For distributed processing with workers on different machines, see:
- **[QUICKSTART.md](QUICKSTART.md)** - Quick setup guide
- **[CLUSTER_DEPLOYMENT.md](CLUSTER_DEPLOYMENT.md)** - Comprehensive deployment documentation

Quick setup:

**On Master Machine:**
```bash
./setup-master.sh
```
master.uri: local[*]
database: s3
delta: false # True for streaming, False for batching
#for s3 (minio)
s3.endpoint: http://minio:9000
s3.user: minioadmin
s3.key: minioadmin
s3.timeout: 600000
server.port: 8090
```
### SIESTA Query type list
Below there is a list of all the possible SIESTA queries along with an example JSON or an example url assuming the
Query Processor is running on localhost:8090
* GET /health/check (Checks if the application is up and running)
* GET /lognames (Returns the names of the different log databases)
* POST /eventTypes (Returns the names of the different event types for a specific log database) \
Example JSON:

**On Worker Machines:**
```bash
./setup-worker.sh
```
{
"log_name" : "test"

This will deploy a true distributed Spark cluster with workers running on separate physical machines for better scalability and performance.

}
```
* GET /refreshData (Reloads metadata, this should run after a new log file is appended)
Expand Down
154 changes: 154 additions & 0 deletions docker-compose-swarm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
version: '3'

networks:
siesta-swarm-net:

volumes:
maven-cache:
minio-storage:

services:
minio:
image: minio/minio:latest
container_name: minio
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
command: server /data
ports:
- "9000:9000"
- "9001:9001"
volumes:
- minio_storage:/data
networks:
- siesta-swarm-net
deploy:
replicas: 1
placement:
constraints:
- 'node.labels.host == m1'

scylla:
container_name: scylla
image: scylladb/scylla:latest
ports:
- "9042:9042"
command: --smp 8 --memory 32G --reserve-memory 2G --overprovisioned 1 --api-address 0.0.0.0
volumes:
- scylla_data:/var/lib/scylla
networks:
- siesta-net
healthcheck:
test: ["CMD-SHELL", "nodetool status"]
interval: 15s
timeout: 15s
retries: 5
deploy:
replicas: 1
restart_policy:
condition: on-failure
max_attempts: 3
placement:
constraints:
- 'node.labels.host == m1'

query:
build: .
image: siesta-query:latest
stdin_open: true
networks:
- siesta-swarm-net
ports:
- '8090:8090'
environment:
master.uri: spark://spark:7077
server.port: 8090
database: s3 # cassandra or s3
delta: "false" # True for streaming, False for batching
spring.mvc.pathmatch.matching-strategy: ANT_PATH_MATCHER
#for s3 (minio)
s3.endpoint: http://minio:9000
s3.user: minioadmin
s3.key: minioadmin
s3.timeout: 600000
# Scylla/Cassandra configuration
cassandra.contact.points: scylla
cassandra.port: 9042
cassandra.keyspace: siesta
volumes:
- maven-cache:/root/.m2
deploy:
replicas: 1
placement:
constraints:
- 'node.labels.host == m1'

spark:
image: spark-base:3.5.4
networks:
- siesta-swarm-net
environment:
- SPARK_MODE=master
- SPARK_MASTER_MEMORY=16G
- SPARK_MASTER_CORES=4
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
- SPARK_USER=spark
- LOGS_PATH=/tmp/spark-events
- RESULTS_PATH=/tmp/output/
- SPARK_MASTER_WEBUI_PORT=8085
ports:
- '8085:8085'
- '4040:4040'
- '8081:8081'
deploy:
replicas: 1
placement:
constraints:
- 'node.labels.host == m1'

spark-worker-1:
image: spark-base:3.5.4
networks:
- siesta-swarm-net
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark:7077
- SPARK_WORKER_MEMORY=8G
- SPARK_WORKER_CORES=4
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
- SPARK_USER=spark
#- LOGS_PATH=/tmp/spark-events
#- RESULTS_PATH=/tmp/output/
deploy:
replicas: 1
placement:
constraints:
- 'node.labels.host == m2'

spark-worker-anaconda-2:
image: spark-base:3.5.4
networks:
- siesta-swarm-net
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark:7077
- SPARK_WORKER_MEMORY=8G
- SPARK_WORKER_CORES=4
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
- SPARK_USER=spark
#- LOGS_PATH=/tmp/spark-events
#- RESULTS_PATH=/tmp/output/
deploy:
replicas: 1
placement:
constraints:
- 'node.labels.host == m2'
Loading