Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions com_009_material_flow_databases/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
*.env
*#
*.py[c|o]
*.DS_Store
data/*
credentials.json
4 changes: 4 additions & 0 deletions com_009_material_flow_databases/.sampleenv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mfa_db_password=<<password>>

carto_user=<<user>>
carto_password=<<password>>
32 changes: 32 additions & 0 deletions com_009_material_flow_databases/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
FROM continuumio/miniconda3
MAINTAINER Nathan Suberi <nathan.suberi@wri.org>

# Provide name of container
ARG NAME

# Install necessary libraries
RUN apt-get update -y && apt-get install -y build-essential unixodbc-dev unixodbc-bin unixodbc libpq-dev
RUN conda update -n base conda && conda install pyodbc pandas
RUN pip install cartoframes && pip uninstall -y tqdm && pip install tqdm==4.20.0

# Configure postgresql drivers
RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz
RUN gunzip psqlodbc-09.02.0100.tar.gz
RUN tar xvf psqlodbc-09.02.0100.tar
RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install

# Copy the application folder inside the container
RUN mkdir -p /opt/$NAME/data
VOLUME /opt/$NAME/data
WORKDIR /opt/$NAME/
COPY contents/ .

# Set up ODBC driver info
RUN mv /opt/$NAME/odbcinst.ini /etc/odbcinst.ini

# Restrict permissions
RUN useradd -r $NAME
RUN chown -R $NAME:$NAME .
#USER $NAME

CMD ["python", "main.py"]
41 changes: 41 additions & 0 deletions com_009_material_flow_databases/Dockerfile_without_conda
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
FROM python:3.6
MAINTAINER Nathan Suberi <nathan.suberi@wri.org>

RUN apt-get update -y

# Install core libraries for ODBC connection
RUN apt-get install -y build-essential unixodbc-dev unixodbc-bin unixodbc

# https://github.com/mkleehammer/pyodbc
RUN pip install --upgrade pip && pip install pyodbc

## Some attempts at manually installing the drivers fail
## https://blog.csdn.net/jollypigclub/article/details/46490541
## https://www.cnblogs.com/he11o-liu/p/7503232.html
## https://odbc.postgresql.org/docs/unix-compilation.html

RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz
RUN gunzip psqlodbc-09.02.0100.tar.gz
RUN tar xvf psqlodbc-09.02.0100.tar
RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install

# set name
ARG NAME=nrt-script
ENV NAME ${NAME}

# copy the application folder inside the container
RUN mkdir -p /opt/$NAME/data
WORKDIR /opt/$NAME/
COPY contents/ .

# Set up ODBC driver info
RUN mv /opt/$NAME/odbcinst.ini /etc/odbcinst.ini
RUN cat /etc/odbcinst.ini
RUN odbcinst -j

RUN useradd -r $NAME
RUN chown -R $NAME:$NAME /opt/$NAME
VOLUME /opt/$NAME/data
#USER $NAME

CMD ["python", "main.py"]
22 changes: 22 additions & 0 deletions com_009_material_flow_databases/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# gee-test

Test NRT script for uploading to GEE.

# Run

Copy `.env.sample` to `.env` and enter account credentials. Copy GCS service account credential file to `credentials.json`.

`./start.sh` Build docker and run once.

# Modify

`start.sh` Edit script name / Docker image name.

`contents/` Copied into container.

`contents/src/__init__.py` Main application script.

`contents/src/eeUtil/` Utility module for interacting with GEE.

`time.cron` Edit cron freqency.

4 changes: 4 additions & 0 deletions com_009_material_flow_databases/contents/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env python3
if __name__ == '__main__':
import src
src.main()
7 changes: 7 additions & 0 deletions com_009_material_flow_databases/contents/odbcinst.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[PostgreSQL Unicode]
Description = PostgreSQL ODBC driver (Unicode version)
Driver = /usr/local/lib/psqlodbcw.so
Setup = libodbcpsqlS.so
Debug = 0
CommLog = 1
UsageCount = 2
114 changes: 114 additions & 0 deletions com_009_material_flow_databases/contents/src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# Import libraries
import os
import sys
import logging
from datetime import datetime

import pyodbc
import pandas as pd
import cartoframes

LOG_LEVEL = logging.INFO

# ODBC Connection details -- these can be pulled out into an odbc.ini file
ODBC_SOURCE_URL = 'vps348928.ovh.net'
ODBC_PORT = '5432'
ODBC_DATABASE = 'mfa'
ODBC_USER = 'mfa'
ODBC_PASSWORD = os.environ.get('mfa_db_password')

CONNECTION_STRING = 'DRIVER={};SERVER={};PORT={};DATABASE={};UID={};PWD={}'
cnxnstr = CONNECTION_STRING.format('{PostgreSQL Unicode}', ODBC_SOURCE_URL, ODBC_PORT, ODBC_DATABASE, ODBC_USER, ODBC_PASSWORD)

# Carto Connection details
CARTO_USER = os.environ.get('CARTO_WRI_RW_USER')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same thing here, env variables arent homogenized across script so this will make things fail at the server

CARTO_PASSWORD = os.environ.get('CARTO_WRI_RW_KEY')

# Flow control
DOWNLOAD = True
# IN CASE RUN INTO TQDM PROBLEMS, refer to: https://github.com/tqdm/tqdm/issues/481

def main():
logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
logging.info('STARTING')

###
# Initialize pyodbc
###

logging.info('Connection string: {}'.format(cnxnstr))
cnxn = pyodbc.connect(cnxnstr, autocommit=True)
cnxn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8')
cnxn.setencoding(encoding='utf-8')
cursor = cnxn.cursor()

# For debugging purposes - there are sometimes when the tqdm package throws an error
# This flow control allows for testing the upload process specifically
if DOWNLOAD:

###
# Fetch data
###

logging.info("DEMO - run query for countries table to prove this works")

before = datetime.now()
countries = pd.DataFrame.from_records(cursor.execute('SELECT * FROM Country').fetchall())
logging.info('Shape of df is: {}'.format(countries.shape))
after = datetime.now()
logging.info("Countries query takes {}".format(after-before))
countries.to_csv('data/countries.csv')

logging.info("PROCESS THE meat and POTATOES - can take some time depending on internet connection speed")

before = datetime.now()
logging.info("Start time for FlowMFA: {}".format(before))
flowmfa = pd.DataFrame.from_records(cursor.execute('SELECT * FROM FlowMFA').fetchall())
logging.info('Shape of df is: {}'.format(flowmfa.shape))
after = datetime.now()
logging.info("FlowMFA query takes {}".format(after-before))


flowmfa.columns = ['index', 'isoalpha3', 'flow', 'mfa13', 'mfa4', 'year', 'amount']
flowmfa.drop('index', axis=1, inplace=True)
flowmfa['amount'] = flowmfa['amount'].astype(float)
flowmfa.to_csv('data/flowmfa.csv')

# before = datetime.now()
# flowdetailed = pd.DataFrame(cursor.execute('SELECT * FROM FlowDetailed').fetchall())
# logging.info('Shape of df is: {}'.format(flowdetailed.shape))
# after = datetime.now()
# logging.info("FlowDetailed query takes {}".format(after-before))
# flowdetailed.columns = [???]
# flowdetailed.to_csv('data/flowdetailed.csv')

else:

logging.info('Attempting to load tables from docker volume')

try:
flowmfa = pd.read_csv('data/flowmfa.csv')
except:
logging.warning('flowmfa table not already available')

try:
flowdetailed = pd.read_csv('data/flowdetailed.csv')
except:
logging.warning('flowdetaild table not already available')


###
# Authenticate to carto and upload data
###

cc = cartoframes.CartoContext(base_url='https://{}.carto.com/'.format(CARTO_USER),
api_key=CARTO_PASSWORD)

###
# Upload data
###

cc.write(flowmfa, 'com_009_flowmfa_autoupdate', overwrite=True)
# cc.write(flowdetailed, 'com_009_flowdetailed', overwrite=True)

logging.info('SUCCESS')
4 changes: 4 additions & 0 deletions com_009_material_flow_databases/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env python3
if __name__ == '__main__':
import src
src.main()
17 changes: 17 additions & 0 deletions com_009_material_flow_databases/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/sh

#Change the NAME variable with the name of your script
NAME=$(basename $(pwd))
LOG=${LOG:-udp://localhost}

docker build -t $NAME --build-arg NAME=$NAME .
docker run -it \
--log-driver=syslog \
--log-opt syslog-address=$LOG \
--log-opt tag=$NAME \
--env-file .env \
--rm $NAME \
python main.py

#/bin/bash
#-v data:/opt/$NAME/data \
1 change: 1 addition & 0 deletions com_009_material_flow_databases/time.cron
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0 0 0 0 *