Compare commits

...

10 Commits

Author SHA1 Message Date
socrates@bakunin
9ea1623c64 Made parser.py executable 2024-02-27 10:40:22 +01:00
f806eeb5fb streamlined logging a little bit more. 2023-12-13 01:36:11 +01:00
socrates
6ede613642 scripts now log to process 1 and therefore to docker logs 2023-12-13 00:54:44 +01:00
socrates
c19896351a added logging to stdout 2023-12-12 15:01:36 +01:00
socrates
fd4d56e667 changed a little bit about logging 2023-12-12 13:15:30 +01:00
socrates
5cc27ca6b9 changed docker for better persistence 2023-12-12 12:36:36 +01:00
socrates
db51753ff9 changed config to production 2023-12-12 11:57:51 +01:00
socrates
e954547b67 fixed dockerfile 2023-12-12 11:54:37 +01:00
socrates
d27a6f1210 added crontab 2023-12-11 13:53:04 +01:00
socrates
f242ef1681 first docker functionality 2023-12-11 12:53:21 +01:00
7 changed files with 88 additions and 25 deletions

1
.gitignore vendored
View File

@@ -1 +1,2 @@
files/
logs/

31
Dockerfile Normal file
View File

@@ -0,0 +1,31 @@
FROM debian:bookworm-slim
ENV PYTHONUNBUFFERED=1
ARG DEBIAN_FRONTEND=noninteractive
RUN apt update \
&& apt -y upgrade\
&& apt -y install \
vim \
cron \
tzdata \
python3-openpyxl \
python3-selenium \
python3-pyvirtualdisplay \
&& rm -rf /var/lib/apt/lists/*
RUN cp -r -f /usr/share/zoneinfo/Europe/Berlin /etc/localtime
COPY /parser.py /app/
COPY /kill-chrome.sh /app/
COPY /run.sh /app/
COPY /init /app/init
RUN mkdir -p /app/files
RUN mkdir -p /app/log
RUN mkdir -p /app/conf
RUN chmod +x /app/run.sh
RUN chmod +x /app/kill-chrome.sh
RUN chmod +x /app/parser.py
CMD [ "/app/run.sh" ]

View File

@@ -1,8 +1,8 @@
{
"oo_url": "https://cloud.diakonie-hamburg.de/index.php/apps/onlyoffice/s/EnjlEAC3pBX1gZn?fileId=5267",
"logfile": "/var/log/dw-parser/dw-parser.log",
"files": "/home/socrates/dw-parser/files/",
"loglevel": "WARNING",
"logfile": "log/dw-parser.log",
"files": "files/",
"loglevel": "INFO",
"smtp_server": "smtp.gmail.com",
"smtp_port": "587",

3
init/crontab Normal file
View File

@@ -0,0 +1,3 @@
# m h dom mon dow cmd
*/5 8-20 * * 1-5 cd /app/ && ./parser.py &> /proc/1/fd/1
0 0 * * * /app/kill-chrome.sh

0
kill-chrome → kill-chrome.sh Executable file → Normal file
View File

56
parser.py Executable file → Normal file
View File

@@ -14,7 +14,7 @@ import time
import sys
def load_settings():
with open('settings.json') as file:
with open('conf/conf.json') as file:
return json.load(file)
def fetch_via_browser(newfile):
@@ -27,9 +27,12 @@ def fetch_via_browser(newfile):
"download.default_directory" : set['files'],
"profile.default_content_settings.popups" : 0,
}
logging.info(prefs)
log.debug("CHROMIUM PREFS:")
log.debug(prefs)
options.add_experimental_option("prefs", prefs)
options.add_argument("--headless=new")
options.add_argument("--headless")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
browser = webdriver.Chrome(options=options)
wait = WebDriverWait(browser, 10)
@@ -67,32 +70,32 @@ def fetch_via_browser(newfile):
timeout += 1
if timeout == 20:
logging.warning("Timeout beim Dateidownload erreicht.")
log.warning("Timeout beim Dateidownload erreicht.")
browser.quit()
disp.stop()
except:
logging.error("Exception raised: ",sys.exc_info())
log.error("Exception raised: ",sys.exc_info())
def rotate_file(oldfile, newfile):
try:
os.remove(oldfile)
except Exception as e:
logging.error("Fehler beim Löschen")
log.error("Fehler beim Löschen")
if hasattr(e, 'message'):
logging.error(e.message)
log.error(e.message)
else:
logging.error(e)
log.error(e)
try:
os.rename(newfile, oldfile)
except Exception as e:
logging.error("Fehler beim Umbenennen")
log.error("Fehler beim Umbenennen")
if hasattr(e, 'message'):
logging.error(e.message)
log.error(e.message)
else:
logging.error(e)
log.error(e)
def compare_files(oldfile, newfile):
results = []
@@ -108,8 +111,8 @@ def compare_files(oldfile, newfile):
old_sheet.calculate_dimension(force=True)
new_sheet.calculate_dimension(force=True)
logging.debug(f'Old-Sheet-Dimensions (mincol/maxcol:minrow/maxrow): {old_sheet.min_column}/{old_sheet.max_column}:{old_sheet.min_row}/{old_sheet.max_row}')
logging.debug(f'New-Sheet-Dimensions (mincol/maxcolminrow/maxrow): {new_sheet.min_column}/{new_sheet.max_column}:{new_sheet.min_row}/{new_sheet.max_row}')
log.debug(f'Old-Sheet-Dimensions (mincol/maxcol:minrow/maxrow): {old_sheet.min_column}/{old_sheet.max_column}:{old_sheet.min_row}/{old_sheet.max_row}')
log.debug(f'New-Sheet-Dimensions (mincol/maxcolminrow/maxrow): {new_sheet.min_column}/{new_sheet.max_column}:{new_sheet.min_row}/{new_sheet.max_row}')
global_min_row = min(old_sheet.min_row, new_sheet.min_row)
global_max_row = max(set.get('max_row', old_sheet.max_row), set.get('max_row', new_sheet.max_row))
@@ -121,7 +124,7 @@ def compare_files(oldfile, newfile):
new_cell = new_sheet.cell(row=row, column=col)
old_cell = old_sheet.cell(row=row, column=col)
logging.debug(f'Performance Check, Cell: {col}:{row}')
log.debug(f'Performance Check, Cell: {col}:{row}')
if new_cell.value != old_cell.value:
if type(new_cell) is ReadOnlyCell:
@@ -131,18 +134,18 @@ def compare_files(oldfile, newfile):
else:
coord = f'{col}/{row}'
message = f'Veränderung in Zelle {coord}: {old_cell.value} ==> {new_cell.value}'
logging.info(message)
log.info(message)
results.append(message)
new_wb.close()
old_wb.close()
if not results:
logging.info("Keine Änderungen gefunden.")
log.info("Keine Änderungen gefunden.")
else:
send_email(results, set['email_recipient'])
else:
logging.warning('Kann Dateien nicht vergleichen, Dateien nicht vorhanden?')
log.warning('Kann Dateien nicht vergleichen, Dateien nicht vorhanden?')
def send_email(results, recipient):
@@ -194,11 +197,20 @@ def main():
compare_files(oldfile, newfile)
set = load_settings()
logging.basicConfig(
level=set['loglevel'],
format='[%(asctime)s] %(levelname)s: %(message)s',
filename=set['logfile']
)
log = logging.getLogger()
log.setLevel(set['loglevel'])
log_format = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
# handler_stdout = logging.StreamHandler(stream="/proc/1/fd/1")
# handler_stdout.setLevel(set['loglevel'])
# handler_stdout.setFormatter(log_format)
handler_file = logging.FileHandler("/proc/1/fd/1")
handler_file.setLevel(set['loglevel'])
handler_file.setFormatter(log_format)
# log.addHandler(handler_stdout)
log.addHandler(handler_file)
if __name__ == '__main__':
main()

16
run.sh Normal file
View File

@@ -0,0 +1,16 @@
#!/bin/bash
echo "DW Parser Docker Container started!" > /proc/1/fd/1
if ! test -f "/app/conf/crontab"; then
cp "/app/init/crontab" "/app/conf/crontab"
echo "Crontab not found... Initialized..." > /proc/1/fd/1
fi
crontab "/app/conf/crontab"
if ! test -f "/app/conf/conf.json"; then
cp "/app/init/conf.json" "/app/conf/conf.json"
echo "Conf.json not found... Initialized..." > /proc/1/fd/1
fi
cron -f