Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions .docker-compose/docker-compose.clickhouse.lb.dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
version: '3'

services:

fits:
build:
context: ../fits
dockerfile: ../fits/Dockerfile
container_name: fits
env_file: ../.env
networks:
- web
restart: unless-stopped
ports:
- 8081:8080

rest:
build:
context: ..
dockerfile: ../Dockerfile
env_file: ../.env
networks:
- web
restart: unless-stopped
environment:
- SPRING_PROFILES_ACTIVE=clickhouse
deploy:
replicas: 1
ports:
- 8092:8080
depends_on:
- fits
- db-docker

web:
build:
context: ../web
dockerfile: ../web/Dockerfile
container_name: web
env_file: ../.env
networks:
- web
restart: unless-stopped
ports:
- 8080:3000

db-docker:
image: yandex/clickhouse-server
container_name: db-docker
networks:
- web
ports:
- 8123:8123
- 9000:9000
- 9004:9004


db-docker-init:
image: yandex/clickhouse-server
container_name: db-docker-init
volumes:
- ./config/clickhouse:/var/clickhouse
depends_on:
- db-docker
networks:
- web
entrypoint: [ '/bin/sh', '-c' ]
command: |
"
while ! clickhouse-client --host db-docker -q \"SHOW databases;\"; do
echo waiting for clickhouse up
sleep 1
done

clickhouse-client --host db-docker --queries-file /var/clickhouse/initdb.sql

tail -f /dev/null
"


adminer:
image: adminer
container_name: adminer
env_file: ../.env
restart: unless-stopped
networks:
- web
ports:
- 8090:8080

nginx:
image: nginx
container_name: nginx
env_file: ../.env
volumes:
- ./config/nginx/nginx.conf:/etc/nginx/conf.d/default.conf
ports:
- 8082:80
networks:
- web
depends_on:
- rest

networks:
web:
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM maven:3.9.0 as builder
COPY . /app
WORKDIR /app
RUN --mount=type=cache,target=/root/.m2 mvn -pl -web clean install
RUN --mount=type=cache,target=/root/.m2 mvn -pl -web clean install -Dspring.profiles.active=h2

FROM openjdk:21-jdk-slim
WORKDIR /app
Expand Down
93 changes: 66 additions & 27 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,58 +1,97 @@
# FITSInn

Place where your FITS files feel good.

## Purpose

FITSInn is a tool to store and analyse technical metadata extracted by characterisation tools such as [FITS](https://projects.iq.harvard.edu/fits/).
![img.png](docs/img.png)
FITSInn is an easy-to-use tool for storing and analyzing technical metadata extracted by characterisation tools
like [FITS](https://projects.iq.harvard.edu/fits/).

![FITSInn Screenshot](docs/img.png)

The features include:
* Improved user experience through an intuitive UI.
* Running file characterisation using FITS on the uploaded files.
* The original files are not stored.
* Slice and dice:
* Filtering,
* Drill-down,
* Property value aggregations,
* Distribution visualisations,
* Sampling.
* Built-in metadata conflict resolution.
* REST API for workflow automation.
### Key Features:

- **Intuitive User Interface**: Enhanced user experience through a sleek and streamlined interface.
- **File Characterisation**: Analyze uploaded files using FITS without storing the original files.
- **Data Analysis Tools**:
- Advanced filtering,
- Drill-down capabilities,
- Property value aggregations,
- Distribution visualizations,
- Sampling options.
- **Conflict Management**: Resolve metadata conflicts effortlessly.
- **Automation Support**: Comes with a REST API to integrate into your workflows.

---

## Installation

### Deployment
### Deployment (Production)

The artifacts are released as Docker images. You can install FITSInn using Docker-compose:
To deploy FITSInn, use the Docker images provided and execute the following docker-compose command:

```
```bash
docker-compose -f docker-compose.yaml up --pull
```

Installation of FITSInn to Docker Swarm or K8S is possible, but is not currently addressed.
> **Note**:
> - Deployment to Docker Swarm or Kubernetes (K8S) is possible but not covered in this guide.


### Upgrade (Production)

### Local build
To upgrade FITSInn, use the Docker images provided:

Building the Docker images from scratch and starting FITSInn is executed via:
```bash
docker-compose -f docker-compose.yaml down
docker-compose -f docker-compose.yaml up --pull
```

> **Note**:
> - The first command will remove all previously created containers. This will cause deletion of all ingested collections.



### Local Development Build

To build the Docker images from scratch and start FITSInn locally:

```bash
docker-compose -f docker-compose.dev.yaml up --build
```

File uploading using bash:
```
---

### Uploading Files to FITSInn

#### Using Bash:

```bash
bash ./utils/fileupload.sh http://localhost:8082 ~/path/to/files collection_name
```

File uploading using python (pip package requests in necessary):
```
#### Using Python:

Ensure you have the `requests` library installed. Then run:

```python
python ./utils/fileupload.py http://localhost:8082/multipleupload ~/path/to/files 100 3 collection_name
```

## Issues
- **URL**: `http://localhost:8082` is suitable for local deployments.
- **Path to Files**: Replace `~/path/to/files` with the actual directory path containing the files.
- **Collection Name**: Replace `collection_name` with a name for your collection.

---

## Reporting Issues

If you encounter any issues while using FITSInn, please report them on GitHub:

[Submit an Issue](https://github.com/datascience/fitsinn/issues)

If you have any issue regarding FITSInn, please use [https://github.com/datascience/fitsinn/issues](https://github.com/datascience/fitsinn/issues).
---

## License

FITSInn is released under MIT license. See the [LICENSE](LICENSE) for details.
FITSInn is released under the MIT license. For more details, see the [LICENSE](LICENSE) file.
40 changes: 38 additions & 2 deletions config/clickhouse/initdb.sql
Original file line number Diff line number Diff line change
@@ -1,10 +1,46 @@
CREATE TABLE characterisationresult
CREATE DATABASE IF NOT EXISTS current;

CREATE TABLE IF NOT EXISTS current.characterisationresult
(
file_path String,
property String,
source String,
property_value String,
value_type String
) ENGINE = ReplacingMergeTree
)
ENGINE = ReplacingMergeTree
PRIMARY KEY (source, property, file_path)
ORDER BY (source, property, file_path);

CREATE TABLE IF NOT EXISTS current.agg_characterisationresult
(
property String,
file_path String,
unique_values AggregateFunction(uniq, String),
any_value AggregateFunction(any, String)
)
ENGINE = AggregatingMergeTree
ORDER BY (property, file_path);

CREATE MATERIALIZED VIEW IF NOT EXISTS current.mv_characterisationresult
TO current.agg_characterisationresult
AS
SELECT
property,
file_path,
uniqState(property_value) AS unique_values,
anyState(property_value) AS any_value
FROM current.characterisationresult
GROUP BY property, file_path;

CREATE VIEW IF NOT EXISTS current.characterisationresultaggregated
AS
SELECT
property,
file_path,
CASE
WHEN finalizeAggregation(unique_values) = 1
THEN finalizeAggregation(any_value)
ELSE 'CONFLICT'
END AS property_value
FROM current.agg_characterisationresult;
8 changes: 8 additions & 0 deletions core/src/main/java/rocks/artur/api/RemoveDataset.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package rocks.artur.api;

/**
* This interface enables getting a property distribution.
*/
public interface RemoveDataset {
Boolean removeDataset(String datasetName);
}
18 changes: 18 additions & 0 deletions core/src/main/java/rocks/artur/api_impl/RemoveDatasetImpl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package rocks.artur.api_impl;

import rocks.artur.api.RemoveDataset;
import rocks.artur.domain.CharacterisationResultGateway;

public class RemoveDatasetImpl implements RemoveDataset {

private CharacterisationResultGateway characterisationResultGateway;

public RemoveDatasetImpl(CharacterisationResultGateway characterisationResultGateway) {
this.characterisationResultGateway = characterisationResultGateway;
}

@Override
public Boolean removeDataset(String datasetName) {
return characterisationResultGateway.removeDataset(datasetName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,6 @@ public interface CharacterisationResultGateway {
void resolveConflictsNative(String datasetName);

List<String> listDatasets();

Boolean removeDataset(String datasetName);
}
28 changes: 2 additions & 26 deletions docker-compose.clickhouse.dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ services:
- web
restart: unless-stopped
environment:
- DB_SELECTOR=clickhouse
- SPRING_PROFILES_ACTIVE=clickhouse
deploy:
replicas: 1
ports:
- 8092:8080
- 8082:8080
depends_on:
- fits
- db-docker
Expand Down Expand Up @@ -77,29 +77,5 @@ services:
tail -f /dev/null
"


adminer:
image: adminer
container_name: adminer
env_file: .env
restart: unless-stopped
networks:
- web
ports:
- 8090:8080

nginx:
image: nginx
container_name: nginx
env_file: .env
volumes:
- ./config/nginx/nginx.conf:/etc/nginx/conf.d/default.conf
ports:
- 8082:80
networks:
- web
depends_on:
- rest

networks:
web:
2 changes: 1 addition & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ services:
- web
restart: unless-stopped
environment:
- DB_SELECTOR=clickhouse
- SPRING_PROFILES_ACTIVE=clickhouse
deploy:
replicas: 1
ports:
Expand Down
Loading
Loading