Compare commits

...

10 Commits

Author SHA1 Message Date
socrates@bakunin
9ea1623c64 Made parser.py executable 2024-02-27 10:40:22 +01:00
f806eeb5fb streamlined logging a little bit more. 2023-12-13 01:36:11 +01:00
socrates
6ede613642 scripts now log to process 1 and therefore to docker logs 2023-12-13 00:54:44 +01:00
socrates
c19896351a added logging to stdout 2023-12-12 15:01:36 +01:00
socrates
fd4d56e667 changed a little bit about logging 2023-12-12 13:15:30 +01:00
socrates
5cc27ca6b9 changed docker for better persistence 2023-12-12 12:36:36 +01:00
socrates
db51753ff9 changed config to production 2023-12-12 11:57:51 +01:00
socrates
e954547b67 fixed dockerfile 2023-12-12 11:54:37 +01:00
socrates
d27a6f1210 added crontab 2023-12-11 13:53:04 +01:00
socrates
f242ef1681 first docker functionality 2023-12-11 12:53:21 +01:00
7 changed files with 88 additions and 25 deletions

1
.gitignore vendored
View File

@@ -1 +1,2 @@
files/ files/
logs/

31
Dockerfile Normal file
View File

@@ -0,0 +1,31 @@
FROM debian:bookworm-slim
ENV PYTHONUNBUFFERED=1
ARG DEBIAN_FRONTEND=noninteractive
RUN apt update \
&& apt -y upgrade\
&& apt -y install \
vim \
cron \
tzdata \
python3-openpyxl \
python3-selenium \
python3-pyvirtualdisplay \
&& rm -rf /var/lib/apt/lists/*
RUN cp -r -f /usr/share/zoneinfo/Europe/Berlin /etc/localtime
COPY /parser.py /app/
COPY /kill-chrome.sh /app/
COPY /run.sh /app/
COPY /init /app/init
RUN mkdir -p /app/files
RUN mkdir -p /app/log
RUN mkdir -p /app/conf
RUN chmod +x /app/run.sh
RUN chmod +x /app/kill-chrome.sh
RUN chmod +x /app/parser.py
CMD [ "/app/run.sh" ]

View File

@@ -1,8 +1,8 @@
{ {
"oo_url": "https://cloud.diakonie-hamburg.de/index.php/apps/onlyoffice/s/EnjlEAC3pBX1gZn?fileId=5267", "oo_url": "https://cloud.diakonie-hamburg.de/index.php/apps/onlyoffice/s/EnjlEAC3pBX1gZn?fileId=5267",
"logfile": "/var/log/dw-parser/dw-parser.log", "logfile": "log/dw-parser.log",
"files": "/home/socrates/dw-parser/files/", "files": "files/",
"loglevel": "WARNING", "loglevel": "INFO",
"smtp_server": "smtp.gmail.com", "smtp_server": "smtp.gmail.com",
"smtp_port": "587", "smtp_port": "587",

3
init/crontab Normal file
View File

@@ -0,0 +1,3 @@
# m h dom mon dow cmd
*/5 8-20 * * 1-5 cd /app/ && ./parser.py &> /proc/1/fd/1
0 0 * * * /app/kill-chrome.sh

0
kill-chrome → kill-chrome.sh Executable file → Normal file
View File

56
parser.py Executable file → Normal file
View File

@@ -14,7 +14,7 @@ import time
import sys import sys
def load_settings(): def load_settings():
with open('settings.json') as file: with open('conf/conf.json') as file:
return json.load(file) return json.load(file)
def fetch_via_browser(newfile): def fetch_via_browser(newfile):
@@ -27,9 +27,12 @@ def fetch_via_browser(newfile):
"download.default_directory" : set['files'], "download.default_directory" : set['files'],
"profile.default_content_settings.popups" : 0, "profile.default_content_settings.popups" : 0,
} }
logging.info(prefs) log.debug("CHROMIUM PREFS:")
log.debug(prefs)
options.add_experimental_option("prefs", prefs) options.add_experimental_option("prefs", prefs)
options.add_argument("--headless=new") options.add_argument("--headless")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
browser = webdriver.Chrome(options=options) browser = webdriver.Chrome(options=options)
wait = WebDriverWait(browser, 10) wait = WebDriverWait(browser, 10)
@@ -67,32 +70,32 @@ def fetch_via_browser(newfile):
timeout += 1 timeout += 1
if timeout == 20: if timeout == 20:
logging.warning("Timeout beim Dateidownload erreicht.") log.warning("Timeout beim Dateidownload erreicht.")
browser.quit() browser.quit()
disp.stop() disp.stop()
except: except:
logging.error("Exception raised: ",sys.exc_info()) log.error("Exception raised: ",sys.exc_info())
def rotate_file(oldfile, newfile): def rotate_file(oldfile, newfile):
try: try:
os.remove(oldfile) os.remove(oldfile)
except Exception as e: except Exception as e:
logging.error("Fehler beim Löschen") log.error("Fehler beim Löschen")
if hasattr(e, 'message'): if hasattr(e, 'message'):
logging.error(e.message) log.error(e.message)
else: else:
logging.error(e) log.error(e)
try: try:
os.rename(newfile, oldfile) os.rename(newfile, oldfile)
except Exception as e: except Exception as e:
logging.error("Fehler beim Umbenennen") log.error("Fehler beim Umbenennen")
if hasattr(e, 'message'): if hasattr(e, 'message'):
logging.error(e.message) log.error(e.message)
else: else:
logging.error(e) log.error(e)
def compare_files(oldfile, newfile): def compare_files(oldfile, newfile):
results = [] results = []
@@ -108,8 +111,8 @@ def compare_files(oldfile, newfile):
old_sheet.calculate_dimension(force=True) old_sheet.calculate_dimension(force=True)
new_sheet.calculate_dimension(force=True) new_sheet.calculate_dimension(force=True)
logging.debug(f'Old-Sheet-Dimensions (mincol/maxcol:minrow/maxrow): {old_sheet.min_column}/{old_sheet.max_column}:{old_sheet.min_row}/{old_sheet.max_row}') log.debug(f'Old-Sheet-Dimensions (mincol/maxcol:minrow/maxrow): {old_sheet.min_column}/{old_sheet.max_column}:{old_sheet.min_row}/{old_sheet.max_row}')
logging.debug(f'New-Sheet-Dimensions (mincol/maxcolminrow/maxrow): {new_sheet.min_column}/{new_sheet.max_column}:{new_sheet.min_row}/{new_sheet.max_row}') log.debug(f'New-Sheet-Dimensions (mincol/maxcolminrow/maxrow): {new_sheet.min_column}/{new_sheet.max_column}:{new_sheet.min_row}/{new_sheet.max_row}')
global_min_row = min(old_sheet.min_row, new_sheet.min_row) global_min_row = min(old_sheet.min_row, new_sheet.min_row)
global_max_row = max(set.get('max_row', old_sheet.max_row), set.get('max_row', new_sheet.max_row)) global_max_row = max(set.get('max_row', old_sheet.max_row), set.get('max_row', new_sheet.max_row))
@@ -121,7 +124,7 @@ def compare_files(oldfile, newfile):
new_cell = new_sheet.cell(row=row, column=col) new_cell = new_sheet.cell(row=row, column=col)
old_cell = old_sheet.cell(row=row, column=col) old_cell = old_sheet.cell(row=row, column=col)
logging.debug(f'Performance Check, Cell: {col}:{row}') log.debug(f'Performance Check, Cell: {col}:{row}')
if new_cell.value != old_cell.value: if new_cell.value != old_cell.value:
if type(new_cell) is ReadOnlyCell: if type(new_cell) is ReadOnlyCell:
@@ -131,18 +134,18 @@ def compare_files(oldfile, newfile):
else: else:
coord = f'{col}/{row}' coord = f'{col}/{row}'
message = f'Veränderung in Zelle {coord}: {old_cell.value} ==> {new_cell.value}' message = f'Veränderung in Zelle {coord}: {old_cell.value} ==> {new_cell.value}'
logging.info(message) log.info(message)
results.append(message) results.append(message)
new_wb.close() new_wb.close()
old_wb.close() old_wb.close()
if not results: if not results:
logging.info("Keine Änderungen gefunden.") log.info("Keine Änderungen gefunden.")
else: else:
send_email(results, set['email_recipient']) send_email(results, set['email_recipient'])
else: else:
logging.warning('Kann Dateien nicht vergleichen, Dateien nicht vorhanden?') log.warning('Kann Dateien nicht vergleichen, Dateien nicht vorhanden?')
def send_email(results, recipient): def send_email(results, recipient):
@@ -194,11 +197,20 @@ def main():
compare_files(oldfile, newfile) compare_files(oldfile, newfile)
set = load_settings() set = load_settings()
logging.basicConfig( log = logging.getLogger()
level=set['loglevel'], log.setLevel(set['loglevel'])
format='[%(asctime)s] %(levelname)s: %(message)s', log_format = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
filename=set['logfile']
) # handler_stdout = logging.StreamHandler(stream="/proc/1/fd/1")
# handler_stdout.setLevel(set['loglevel'])
# handler_stdout.setFormatter(log_format)
handler_file = logging.FileHandler("/proc/1/fd/1")
handler_file.setLevel(set['loglevel'])
handler_file.setFormatter(log_format)
# log.addHandler(handler_stdout)
log.addHandler(handler_file)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

16
run.sh Normal file
View File

@@ -0,0 +1,16 @@
#!/bin/bash
echo "DW Parser Docker Container started!" > /proc/1/fd/1
if ! test -f "/app/conf/crontab"; then
cp "/app/init/crontab" "/app/conf/crontab"
echo "Crontab not found... Initialized..." > /proc/1/fd/1
fi
crontab "/app/conf/crontab"
if ! test -f "/app/conf/conf.json"; then
cp "/app/init/conf.json" "/app/conf/conf.json"
echo "Conf.json not found... Initialized..." > /proc/1/fd/1
fi
cron -f