Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 1 addition & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CSS_DIR="styles/main.css"
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.csv filter=lfs diff=lfs merge=lfs -text
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
myenv
venv
.ipynb_checkpoints
/BatchProcess/DataSource/YahooFinance/.ipynb_checkpoints
__pycache__
build
dist
4 changes: 4 additions & 0 deletions .sonarlint/connectedMode.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"sonarCloudOrganization": "nolanmm",
"projectKey": "NolanMM_DeepLearning_Quant_Trading_Group_Project"
}
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"sonarlint.connectedMode.project": {
"connectionId": "nolanmm",
"projectKey": "NolanMM_DeepLearning_Quant_Trading_Group_Project"
}
}
209 changes: 209 additions & 0 deletions 1_HomePage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
from BatchProcess.DataSource.ListSnP500.ListSnP500Collect import ListSAndP500
from BatchProcess.BatchProcess import BatchProcessManager
from multiprocessing.pool import ThreadPool
import plotly.graph_objects as go
from dotenv import load_dotenv
from pathlib import Path
import streamlit as st
import pandas as pd
import time
import os

pool = ThreadPool(processes=6)
load_dotenv(override=True)

current_dir = Path(__file__).parent if "__file__" in locals() else Path.cwd()
css_file = current_dir / os.getenv("CSS_DIR")
defaut_start_date = "2014-01-01"

st.set_page_config(page_title="Home Page", page_icon=":house:",
initial_sidebar_state="collapsed")
st.sidebar.header("Quantitative Trading Project")
st.title("Welcome to the Home Page")
st.markdown(
"""
<style>
.st-emotion-cache-ocqkz7.e1f1d6gn5{
text-align: center;
}

h1{
text-align: center;
}

.st-emotion-cache-13ln4jf.ea3mdgi5 {
max-width: 1200px;
}
</style>
""", unsafe_allow_html=True)

# --- LOAD CSS ---
with open(css_file) as f:
st.markdown("<style>{}</style>".format(f.read()), unsafe_allow_html=True)


# --- CACHE DATA ---
@st.cache_data(ttl=1800)
def retrieve_list_ticket():
list_of_symbols__ = BatchProcessManager().get_stock_list_in_database()
if list_of_symbols__ is None or len(list_of_symbols__) < 497:
list_of_symbols__ = ListSAndP500().tickers_list
return list_of_symbols__


@st.cache_data(ttl=1800)
def batch_process(list_of_symbols__):
return BatchProcessManager().run_process(list_of_symbols__)


@st.cache_data(ttl=1800)
def batch_process_retrieve_data_by_stock(the_stock_in):
return BatchProcessManager().get_stock_data_by_ticker(the_stock_in)


@st.cache_data
def convert_df_to_csv(df):
return df.to_csv().encode("utf-8")


@st.cache_data(ttl=1800)
def batch_process_retrieve_all_data_in_stock_table():
return BatchProcessManager().get_all_stock_data_in_database()


PROCESS_TIME = 180 # seconds
_list_of_symbols = retrieve_list_ticket()


# --- MAIN PAGE ---
if "stock_data" not in st.session_state:
st.session_state.stock_data = None
st.markdown('---')
st.markdown("### I. Retrieve stock data symbol list")

the_stock = st.selectbox(
"Select the stock you want to retrieve from database (if available)", _list_of_symbols)

retrieve_col1, retrieve_col2, retrieve_col3 = st.columns(3)
with retrieve_col1:
btn_prepare = st.button("Retrieve stock data from database...")

# Download data by ticket button
with retrieve_col2:
btn_retrieve_data_by_ticket = st.button(
"Process File for Ticket Data in Database (csv)")

if btn_retrieve_data_by_ticket:
st.session_state.stock_data = the_stock
df = batch_process_retrieve_data_by_stock(the_stock)
if df is not None:
df = pd.DataFrame(df)
csv = convert_df_to_csv(df)
st.download_button(
label="Download Ticket as CSV",
data=csv,
file_name=f"Ticket_{the_stock}_data.csv",
mime="text/csv",
)
else:
st.error(
"No data found for this stock, please update the database first.")

# Download all data in database button
with retrieve_col3:
btn_retrieve_all_data = st.button("Download All Data in Database(csv)")
if btn_retrieve_all_data:
st.session_state.stock_data = the_stock
df = batch_process_retrieve_all_data_in_stock_table()
if df is not None:
csv = convert_df_to_csv(df)
st.download_button(
label="Download All Data as CSV",
data=csv,
file_name="All_Stock_data.csv",
mime="text/csv",
)
else:
st.error(
"No data found for in database, please update the database first.")

if btn_prepare:
st.session_state.stock_data = the_stock

st.markdown('---')
# --- TABS ---
st.markdown(
"### II. List of 500 S&P, Historical data, In Day Data, Top News, Reddit News")
List500, Historical_data, IndayData_RealTime, news, reddit_news = st.tabs(
["List 500 S&P", "Historical data", "In Day Data", "Top News", "Reddit News"])

# --- TABS LIST500 S&P CONTENT---
with List500:
st.write("List of 500 S&P")
st.write(_list_of_symbols)

# --- TABS HISTORICAL DATA CONTENT---
with Historical_data:
if st.session_state.stock_data is not None:
df = batch_process_retrieve_data_by_stock(st.session_state.stock_data)
if df is not None:
df = pd.DataFrame(df)
fig = go.Figure(data=[go.Candlestick(x=df['date'],
open=df['open'],
high=df['high'],
low=df['low'],
close=df['close'])])
# Add a title
fig.update_layout(
title=f"{st.session_state.stock_data} Price Candlestick Chart",
# Center the title
title_x=0.3,

# Customize the font and size of the title
title_font=dict(size=24, family="Arial"),

# Set the background color of the plot
plot_bgcolor='white',

# Customize the grid lines
xaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgray'),
yaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgray'),
)

# Add a range slider and customize it
fig.update_layout(
xaxis_rangeslider_visible=True, # Show the range slider

# Customize the range slider's appearance
xaxis_rangeslider=dict(
thickness=0.1, # Set the thickness of the slider
bordercolor='black', # Set the border color
borderwidth=1, # Set the border width
)
)

# Display the chart in Streamlit
st.plotly_chart(fig)
st.markdown(
f"#### Dataframe of {st.session_state.stock_data} Prices")
st.write(df)
else:
st.write(
"No data found for this stock, please update the database first.")
else:
st.write("Please select the stock to retrieve the data")

st.markdown('---')
# --- Set Up/ Update all data in database---
st.markdown("### III. Set Up data in database for the first time")
update_database = st.button("Update Database")
if update_database:
async_result = pool.apply_async(
batch_process, args=(_list_of_symbols,))
bar = st.progress(0)
per = PROCESS_TIME / 100
for i in range(100):
time.sleep(per)
bar.progress(i + 1)
df_dict = async_result.get()
st.write("Please check the data in the database")
12 changes: 12 additions & 0 deletions BatchProcess/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
DATABASE_SERVER="localhost"
DATABASE_PORT="5432"
DATABASE_NAME="postgres"
DATABASE_USER="postgres"
DATABASE_PASSWORD="admin"
CREATE_SCHEMA_QUERY="CREATE SCHEMA IF NOT EXISTS tickets;"

INSERT_QUERY_REDDIT_TABLE="INSERT INTO reddits.stock_reddit_news (id, subreddit, url, title, score, num_comments, downvotes, ups, date_created_utc) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING;"
POSTGRE_CONNECTION="dbname=postgres user=postgres host=localhost password=admin"
CONFIGURE_REDDIT_TABLE = "CREATE INDEX IF NOT EXISTS idx_stock_reddit_news_id ON reddits.stock_reddit_news(id);"
CREATE_REDDIT_TABLE_QUERY = "CREATE TABLE IF NOT EXISTS reddits.stock_reddit_news (id VARCHAR PRIMARY KEY, subreddit VARCHAR, url VARCHAR, title TEXT, score TEXT, num_comments TEXT, downvotes TEXT, ups TEXT, date_created_utc TEXT);"
CREATE_REDDIT_SCHEMA_QUERY="CREATE SCHEMA IF NOT EXISTS reddits;"
129 changes: 129 additions & 0 deletions BatchProcess/BatchProcess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from Database.PostGreSQLInteraction import DatabaseManager, StockDatabaseManager, TicketDimDatabaseManager, RedditNewsDatabaseManager
from BatchProcess.DataSource.YahooFinance.YahooFinances_Services import YahooFinance
from dotenv import load_dotenv
from datetime import datetime
import pandas as pd
import psycopg2
import os

defaut_start_date = "2014-01-01"

date_to = datetime.now().strftime('%Y-%m-%d')

load_dotenv(override=True)

postgres_server = os.getenv("DATABASE_SERVER")
postgres_port = os.getenv("DATABASE_PORT")
postgres_dbname = os.getenv("DATABASE_NAME")
postgres_user = os.getenv("DATABASE_USER")
postgres_pass = os.getenv("DATABASE_PASSWORD")


class BatchProcessManager:
def __init__(self):
self.list_of_symbols = None
self.dict_ticket = dict()
self.dbname = postgres_dbname
self.user = postgres_user
self.password = postgres_pass
self.host = postgres_server
self.port = postgres_port
self.conn = self.create_connection()

def create_connection(self):
"""
Create a connection to the database
"""
try:
conn = psycopg2.connect(
dbname=self.dbname,
user=self.user,
password=self.password,
host=self.host,
port=self.port
)
return conn
except Exception as e:
print(e)
return None

def run_process(self, list_of_symbols_):
self.list_of_symbols = list_of_symbols_

# Get data from Yahoo Finance
transformed_data = YahooFinance(
self.list_of_symbols, defaut_start_date, date_to)
df = transformed_data.process_data()

# Create Database Manager
db_manager = DatabaseManager()

# Drop all tables exist in the database
db_manager.delete_schema()

# Create Stock table
db_manager.StockDatabaseManager.create_schema_and_tables(
self.list_of_symbols)

# Create TicketDim table
db_manager.TicketDimDatabaseManager.create_table()

# Create RedditNews table
db_manager.RedditNewsDatabaseManager.create_schema_and_tables()

# Apply multiprocessing to insert data into the database (Testing later)
for i in range(len(self.list_of_symbols)):
filtered_data = df[df['stock_id'] == self.list_of_symbols[i]]
filtered_data = filtered_data.reset_index()
self.dict_ticket[self.list_of_symbols[i]] = filtered_data

# Insert data into the database Stock table
for key, value in self.dict_ticket.items():
if isinstance(value, pd.DataFrame):
db_manager.StockDatabaseManager.insert_data(key, value)

# Insert data into the database TicketDim table
db_manager.TicketDimDatabaseManager.insert_data(self.list_of_symbols)

db_manager.close_connection()
return self.dict_ticket

def get_stock_data_by_ticker(self, ticker):
try:
# Create StockDatabaseManager
db_manager = StockDatabaseManager()
# Get data by table
data = db_manager.get_data_by_table(ticker)
db_manager.close_connection()
return data
except Exception as e:
print(e)
return None

def get_stock_list_in_database(self):
try:
# Create TicketDimDatabaseManager
db_manager = TicketDimDatabaseManager()
# Get data
data = db_manager.get_data()
db_manager.close_connection()
return data
except Exception as e:
print(e)
return None

def get_all_stock_data_in_database(self):
try:
db_manager = StockDatabaseManager()
data = db_manager.fetch_all_data()
db_manager.close_connection()
dataframes_list = [value for key, value in data.items()]
combined_dataframe = pd.concat(dataframes_list, ignore_index=True)
combined_dataframe['date'] = pd.to_datetime(
combined_dataframe['date'])

# Sort the combined dataframe by the 'date' column
return combined_dataframe.sort_values(by='date')
except Exception as e:
print(e)
return None
18 changes: 18 additions & 0 deletions BatchProcess/DataSource/AlphaVantage/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Postgres Configuration
POSTGRES_VERSION="org.postgresql:postgresql:42.2.20"
POSTGRES_URL="jdbc:postgresql://localhost:5432/postgres"
POSTGRES_USER="postgres"
POSTGRES_PASSWORD="admin"
POSTGRES_TABLE="FactPrices"

FORMAT_FILE="jdbc"
MODE="append"

COLUMN_1="stock_id"
COLUMN_2="date"
COLUMN_3="open"
COLUMN_4="high"
COLUMN_5="low"
COLUMN_6="close"
COLUMN_7="volume"
COLUMN_8="adjusted_close"
Loading