initial commit
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
files/
|
||||||
205
parser.py
Normal file
205
parser.py
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from distutils.debug import DEBUG
|
||||||
|
import json, os, smtplib, ssl, logging
|
||||||
|
from openpyxl.cell.read_only import ReadOnlyCell
|
||||||
|
from pyvirtualdisplay import Display
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
from datetime import datetime
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
from email.message import EmailMessage
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def load_settings():
|
||||||
|
with open('settings.json') as file:
|
||||||
|
return json.load(file)
|
||||||
|
|
||||||
|
def fetch_via_browser(newfile):
|
||||||
|
disp = Display(backend="xvfb", size=(800,600))
|
||||||
|
disp.start()
|
||||||
|
|
||||||
|
|
||||||
|
options = webdriver.ChromeOptions()
|
||||||
|
prefs = {
|
||||||
|
"download.default_directory" : set['files'],
|
||||||
|
"profile.default_content_settings.popups" : 0,
|
||||||
|
}
|
||||||
|
logging.info(prefs)
|
||||||
|
options.add_experimental_option("prefs", prefs)
|
||||||
|
options.add_argument("--headless=new")
|
||||||
|
browser = webdriver.Chrome(options=options)
|
||||||
|
wait = WebDriverWait(browser, 10)
|
||||||
|
|
||||||
|
|
||||||
|
browser.get(set['oo_url'])
|
||||||
|
|
||||||
|
iframe = wait.until(
|
||||||
|
EC.element_to_be_clickable((By.TAG_NAME, 'iframe'))
|
||||||
|
)
|
||||||
|
browser.switch_to.frame(iframe)
|
||||||
|
|
||||||
|
btn_file = wait.until(
|
||||||
|
EC.element_to_be_clickable((By.XPATH, "//a[@data-tab='file']"))
|
||||||
|
)
|
||||||
|
wait.until(
|
||||||
|
EC.invisibility_of_element_located((By.CSS_SELECTOR, 'div.asc-loadmask'))
|
||||||
|
)
|
||||||
|
btn_file.click()
|
||||||
|
|
||||||
|
panel_saveas = wait.until(
|
||||||
|
EC.element_to_be_clickable((By.CSS_SELECTOR, "div#panel-saveas"))
|
||||||
|
)
|
||||||
|
|
||||||
|
btn_download = wait.until(
|
||||||
|
EC.element_to_be_clickable((By.CSS_SELECTOR, "div.svg-format-xlsx"))
|
||||||
|
)
|
||||||
|
|
||||||
|
btn_download.click()
|
||||||
|
|
||||||
|
try:
|
||||||
|
timeout = 0
|
||||||
|
|
||||||
|
while not os.path.exists(newfile) and timeout < 20:
|
||||||
|
time.sleep(1)
|
||||||
|
timeout += 1
|
||||||
|
|
||||||
|
if timeout == 20:
|
||||||
|
logging.warning("Timeout beim Dateidownload erreicht.")
|
||||||
|
|
||||||
|
browser.quit()
|
||||||
|
disp.stop()
|
||||||
|
|
||||||
|
except:
|
||||||
|
logging.error("Exception raised: ",sys.exc_info())
|
||||||
|
|
||||||
|
def rotate_file(oldfile, newfile):
|
||||||
|
try:
|
||||||
|
os.remove(oldfile)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("Fehler beim Löschen")
|
||||||
|
if hasattr(e, 'message'):
|
||||||
|
logging.error(e.message)
|
||||||
|
else:
|
||||||
|
logging.error(e)
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.rename(newfile, oldfile)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("Fehler beim Umbenennen")
|
||||||
|
if hasattr(e, 'message'):
|
||||||
|
logging.error(e.message)
|
||||||
|
else:
|
||||||
|
logging.error(e)
|
||||||
|
|
||||||
|
def compare_files(oldfile, newfile):
|
||||||
|
results = []
|
||||||
|
|
||||||
|
if os.path.exists(oldfile) and os.path.exists(newfile):
|
||||||
|
new_wb = load_workbook(filename=newfile, read_only=True)
|
||||||
|
|
||||||
|
old_wb = load_workbook(filename=oldfile, read_only=True)
|
||||||
|
|
||||||
|
new_sheet = new_wb['BewerberInnen']
|
||||||
|
old_sheet = old_wb['BewerberInnen']
|
||||||
|
|
||||||
|
old_sheet.calculate_dimension(force=True)
|
||||||
|
new_sheet.calculate_dimension(force=True)
|
||||||
|
|
||||||
|
logging.debug(f'Old-Sheet-Dimensions (mincol/maxcol:minrow/maxrow): {old_sheet.min_column}/{old_sheet.max_column}:{old_sheet.min_row}/{old_sheet.max_row}')
|
||||||
|
logging.debug(f'New-Sheet-Dimensions (mincol/maxcolminrow/maxrow): {new_sheet.min_column}/{new_sheet.max_column}:{new_sheet.min_row}/{new_sheet.max_row}')
|
||||||
|
|
||||||
|
global_min_row = min(old_sheet.min_row, new_sheet.min_row)
|
||||||
|
global_max_row = max(set.get('max_row', old_sheet.max_row), set.get('max_row', new_sheet.max_row))
|
||||||
|
global_min_col = min(old_sheet.min_column, new_sheet.min_column)
|
||||||
|
global_max_col = max(set.get('max_col', old_sheet.max_column), set.get('max_col', new_sheet.max_column))
|
||||||
|
|
||||||
|
for row in range(global_min_row, global_max_row + 1):
|
||||||
|
for col in range(global_min_col, global_max_col + 1):
|
||||||
|
new_cell = new_sheet.cell(row=row, column=col)
|
||||||
|
old_cell = old_sheet.cell(row=row, column=col)
|
||||||
|
|
||||||
|
logging.debug(f'Performance Check, Cell: {col}:{row}')
|
||||||
|
|
||||||
|
if new_cell.value != old_cell.value:
|
||||||
|
if type(new_cell) is ReadOnlyCell:
|
||||||
|
coord = new_cell.coordinate
|
||||||
|
elif type(old_cell) is ReadOnlyCell:
|
||||||
|
coord = old_cell.coordinate
|
||||||
|
else:
|
||||||
|
coord = f'{col}/{row}'
|
||||||
|
message = f'Veränderung in Zelle {coord}: {old_cell.value} ==> {new_cell.value}'
|
||||||
|
logging.info(message)
|
||||||
|
results.append(message)
|
||||||
|
new_wb.close()
|
||||||
|
old_wb.close()
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
logging.info("Keine Änderungen gefunden.")
|
||||||
|
else:
|
||||||
|
send_email(results, set['email_recipient'])
|
||||||
|
|
||||||
|
else:
|
||||||
|
logging.warning('Kann Dateien nicht vergleichen, Dateien nicht vorhanden?')
|
||||||
|
|
||||||
|
def send_email(results, recipient):
|
||||||
|
|
||||||
|
msg = EmailMessage()
|
||||||
|
|
||||||
|
msg['Subject'] = 'ISB-Vermittlung Watchdog'
|
||||||
|
msg['From'] = set['smtp_user']
|
||||||
|
msg['To'] = recipient
|
||||||
|
msg.set_type('text/html')
|
||||||
|
|
||||||
|
msg.set_content('\r\n'.join(results))
|
||||||
|
|
||||||
|
html_msg = f"""\
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<a href="{set['oo_url']}">Zur Vermittlungsliste des DW</a>
|
||||||
|
<br />
|
||||||
|
<br />
|
||||||
|
{'<br />'.join(results)}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
msg.add_alternative(html_msg, subtype="html")
|
||||||
|
|
||||||
|
context = ssl.create_default_context()
|
||||||
|
with smtplib.SMTP(set['smtp_server'], set['smtp_port']) as server:
|
||||||
|
server.starttls(context=context)
|
||||||
|
server.login(set['smtp_user'], set['smtp_password'])
|
||||||
|
server.send_message(msg)
|
||||||
|
server.quit()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = sys.argv[1:]
|
||||||
|
|
||||||
|
oldfile = os.path.join(set['files'], 'vermittlung-old.xlsx')
|
||||||
|
newfile = os.path.join(set['files'], 'ISB Vermittlung.xlsx')
|
||||||
|
|
||||||
|
if len(args) == 1 and args[0] == '-t':
|
||||||
|
send_email("test", "marc-pascal.koenig@outlook.de")
|
||||||
|
|
||||||
|
if len(args) == 1 and args[0] == '-c':
|
||||||
|
compare_files(oldfile, newfile)
|
||||||
|
|
||||||
|
else:
|
||||||
|
rotate_file(oldfile, newfile)
|
||||||
|
|
||||||
|
fetch_via_browser(newfile)
|
||||||
|
|
||||||
|
compare_files(oldfile, newfile)
|
||||||
|
|
||||||
|
set = load_settings()
|
||||||
|
logging.basicConfig(
|
||||||
|
level=set['loglevel'],
|
||||||
|
format='[%(asctime)s] %(levelname)s: %(message)s',
|
||||||
|
filename=set['logfile']
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
15
settings.json
Normal file
15
settings.json
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"oo_url": "https://cloud.diakonie-hamburg.de/index.php/apps/onlyoffice/s/EnjlEAC3pBX1gZn?fileId=5267",
|
||||||
|
"logfile": "/var/log/dw-parser/dw-parser.log",
|
||||||
|
"files": "/home/socrates/dw-parser/files/",
|
||||||
|
"loglevel": "WARNING",
|
||||||
|
|
||||||
|
"smtp_server": "smtp.gmail.com",
|
||||||
|
"smtp_port": "587",
|
||||||
|
"smtp_password": "gkrybqzhzkkggbrh",
|
||||||
|
"smtp_user": "sockenklaus@gmail.com",
|
||||||
|
"email_recipient": "behr@bodelschwingh.com, koenig@bodelschwingh.com",
|
||||||
|
|
||||||
|
"max_col": 100,
|
||||||
|
"__max_row": 25
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user