Added Export to Excel, uploadable to Outlands cloud with share link
This commit is contained in:
parent
c94fa528fe
commit
f5c34d9b0f
|
|
@ -1,12 +1,18 @@
|
||||||
from typing import List, Union, Generator, Iterator, Dict
|
from typing import List, Union, Generator, Iterator
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel, Field
|
||||||
from sqlalchemy import create_engine
|
from sqlalchemy import create_engine
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
import pylightxl as xl
|
from urllib.parse import urlencode
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
from openpyxl.styles import Alignment, Font
|
||||||
|
from openpyxl.drawing.image import Image
|
||||||
|
from copy import copy
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
|
import ast
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
|
@ -23,12 +29,23 @@ class Pipeline:
|
||||||
DB_DATABASE: str
|
DB_DATABASE: str
|
||||||
DB_TABLES: List[str]
|
DB_TABLES: List[str]
|
||||||
|
|
||||||
XL_TEMPLATE_PATH: str
|
KEYSTORE_URL: str
|
||||||
|
KEYSTORE_API: str
|
||||||
|
|
||||||
|
CLOUD_HOST: str
|
||||||
|
CLOUD_API_VERSION: str
|
||||||
|
CLOUD_EXCEL_LIBRARY: str
|
||||||
|
CLOUD_USER: str
|
||||||
|
CLOUD_PASS: str
|
||||||
|
CLOUD_EXCEL_TEMPLATE_FILENAME: str
|
||||||
|
|
||||||
|
TMP_STORAGE_DIR: str
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.name = "ØMS Membership Database"
|
self.name = "01 PolarPress Database RAG"
|
||||||
self.engine = None
|
self.engine = None
|
||||||
self.nlsql_response = ""
|
self.nlsql_response = ""
|
||||||
|
self.last_emit_time = 0
|
||||||
self.valves = self.Valves(
|
self.valves = self.Valves(
|
||||||
**{
|
**{
|
||||||
"pipelines": ["*"],
|
"pipelines": ["*"],
|
||||||
|
|
@ -41,7 +58,15 @@ class Pipeline:
|
||||||
"DB_PASSWORD": os.getenv("DB_PASSWORD", "YOUR_PASSWORD"),
|
"DB_PASSWORD": os.getenv("DB_PASSWORD", "YOUR_PASSWORD"),
|
||||||
"DB_DATABASE": os.getenv("DB_DATABASE", "pp_polarpress_demo_prod"),
|
"DB_DATABASE": os.getenv("DB_DATABASE", "pp_polarpress_demo_prod"),
|
||||||
"DB_TABLES": ["users", "club_memberships", "stripe_transactions", "vipps_transactions"],
|
"DB_TABLES": ["users", "club_memberships", "stripe_transactions", "vipps_transactions"],
|
||||||
"XL_TEMPLATE_PATH": os.getenv("XL_TEMPLATE_PATH", "/var/support/openwebui/xl_templates/oms_dataexport.xlsx")
|
"KEYSTORE_URL": os.getenv("KEYSTORE_URL", "https://keystore.outlands.no"),
|
||||||
|
"KEYSTORE_API": os.getenv("KEYSTORE_API", "123"),
|
||||||
|
"CLOUD_HOST": os.getenv("CLOUD_HOST", "outlands.no"),
|
||||||
|
"CLOUD_API_VERSION": os.getenv("CLOUD_API_VERSION", "api2"),
|
||||||
|
"CLOUD_EXCEL_LIBRARY": os.getenv("CLOUD_EXCEL_LIBRARY", "exports"),
|
||||||
|
"CLOUD_USER": os.getenv("CLOUD_USER", "username"),
|
||||||
|
"CLOUD_PASS": os.getenv("CLOUD_PASS", "password"),
|
||||||
|
"CLOUD_EXCEL_TEMPLATE_FILENAME": os.getenv("CLOUD_EXCEL_TEMPLATE_FILENAME", "oms_eksportmal.xls"),
|
||||||
|
"TMP_STORAGE_DIR": os.getenv("TMP_STORAGE_DIR", "/home/heno/tmp"),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -56,10 +81,181 @@ class Pipeline:
|
||||||
|
|
||||||
async def on_startup(self):
|
async def on_startup(self):
|
||||||
self.init_db()
|
self.init_db()
|
||||||
|
await self.sea_login()
|
||||||
|
await self.sea_check_library()
|
||||||
|
await self.sea_link()
|
||||||
|
|
||||||
async def on_shutdown(self):
|
async def on_shutdown(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
async def sea_login(self):
|
||||||
|
try:
|
||||||
|
headers= {
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
|
}
|
||||||
|
data = urlencode({'username': self.valves.CLOUD_USER, 'password': self.valves.CLOUD_PASS})
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"https://{self.valves.CLOUD_HOST}/{self.valves.CLOUD_API_VERSION}/auth-token/",
|
||||||
|
data=data,
|
||||||
|
headers=headers
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logging.error("Failed to decode json response connecting to the cloud")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if 'non_field_errors' in data:
|
||||||
|
if data['non_field_errors'][0] == 'Unable to login with provided credentials.':
|
||||||
|
logging.error(f"Invalid username or password for user {self.valves.CLOUD_USER} on seafile host {self.valves.CLOUD_HOST} provided. Could not get auth token")
|
||||||
|
return False
|
||||||
|
elif 'token' in data:
|
||||||
|
self.token = data['token']
|
||||||
|
print(f"Login to seafile cloud host {self.valves.CLOUD_HOST} successful")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logging.error("Unexpected response from seafile server")
|
||||||
|
return False
|
||||||
|
except requests.RequestException as e:
|
||||||
|
logging.error(f"Unable to connect to seafile cloud {self.valves.CLOUD_HOST}. Error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def sea_check_library(self) -> bool:
|
||||||
|
try:
|
||||||
|
headers = {
|
||||||
|
'Authorization': f"Token {self.token}",
|
||||||
|
'Accept': 'application/json; indent=4'
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {
|
||||||
|
'type': 'mine'
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.get(
|
||||||
|
f"https://{self.valves.CLOUD_HOST}/{self.valves.CLOUD_API_VERSION}/repos/",
|
||||||
|
headers=headers,
|
||||||
|
params=params
|
||||||
|
)
|
||||||
|
|
||||||
|
res = response.json()
|
||||||
|
library_exists = False
|
||||||
|
library_id = None
|
||||||
|
writable = True
|
||||||
|
|
||||||
|
for library in res:
|
||||||
|
if library['name'] == self.valves.CLOUD_EXCEL_LIBRARY:
|
||||||
|
library_exists = True
|
||||||
|
library_id = library['id']
|
||||||
|
if library.get('encrypted') or library.get('permission') == 'r':
|
||||||
|
writable = False
|
||||||
|
|
||||||
|
if not writable:
|
||||||
|
logging.error(f"The library {self.valves.CLOUD_EXCEL_LIBRARY} exists but is is not writable. Log in to {self.valves.CLOUD_HOST} from your browser and check it's permission settings")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not library_exists:
|
||||||
|
make_library_response = requests.post(
|
||||||
|
f"https://{self.valves.CLOUD_HOST}/{self.valves.CLOUD_API_VERSION}/repos/",
|
||||||
|
headers=headers,
|
||||||
|
json={
|
||||||
|
'name': self.valves.CLOUD_EXCEL_LIBRARY
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if make_library_response.status_code == 400:
|
||||||
|
logging.error(f"Could not create neccessary library {self.valves.CLOUD_EXCEL_LIBRARY} on your {self.valves.CLOUD_HOST} account. Contact a system administrator")
|
||||||
|
return False
|
||||||
|
elif make_library_response.status_code == 520:
|
||||||
|
logging.error(f"Could not create neccessary library {self.valves.CLOUD_EXCEL_LIBRARY} on your {self.valves.CLOUD_HOST} account. Contact a system administrator")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.library_id = make_library_response.json()['repo_id']
|
||||||
|
print(f"The library {self.valves.CLOUD_EXCEL_LIBRARY} was created", fg='yellow')
|
||||||
|
return True
|
||||||
|
|
||||||
|
self.library_id = library_id
|
||||||
|
print(f"The library {self.valves.CLOUD_EXCEL_LIBRARY} exists, no need to create")
|
||||||
|
return True
|
||||||
|
except requests.RequestException as e:
|
||||||
|
logging.error(f"Unable to connect to cloud {self.valves.CLOUD_HOST}. Error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def sea_link(self) -> bool:
|
||||||
|
try:
|
||||||
|
response = requests.get(
|
||||||
|
f"https://{self.valves.CLOUD_HOST}/{self.valves.CLOUD_API_VERSION}/repos/{self.library_id}/upload-link/?p=/",
|
||||||
|
headers={'Authorization': 'Token {token}'.format(token=self.token)}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 403:
|
||||||
|
logging.error(f"Lacking permissions to upload to library '{self.library_id}'. Doublecheck permissions by logging into https://{self.valves.CLOUD_HOST} and go to the library to check it's settings")
|
||||||
|
return False
|
||||||
|
elif response.status_code == 500:
|
||||||
|
logging.error(f"Could not get permission to upload. Check that you have enough storage permissions left on your {self.valves.CLOUD_HOST} account, and if not try to delete some old files first.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.upload_link = response.json()
|
||||||
|
print(f"Recieved go ahead for upload on {self.valves.CLOUD_HOST}")
|
||||||
|
return True
|
||||||
|
except requests.RequestException as e:
|
||||||
|
logging.error(f"Unable to connect to cloud {self.valves.CLOUD_HOST}. Error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def sea_upload(self, path: str, file_name: str) -> bool:
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
print(f"Cannot read and upload '{path}'")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
requests.post(
|
||||||
|
self.upload_link, data={'filename': file_name, 'parent_dir': '/'},
|
||||||
|
files={'file': open(path, 'rb')},
|
||||||
|
headers={'Authorization': 'Token {token}'.format(token=self.token)}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Excel file '{file_name}' successfully uploaded and stored to your {self.valves.CLOUD_HOST}")
|
||||||
|
return True
|
||||||
|
except requests.RequestException as e:
|
||||||
|
logging.error(f"Unable to upload file to {self.valves.CLOUD_HOST}. Error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def sea_share(self, file: str):
|
||||||
|
try:
|
||||||
|
headers = {
|
||||||
|
'Authorization': f'Token {self.token}',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': 'application/json; indent=4'
|
||||||
|
}
|
||||||
|
|
||||||
|
json_data = {
|
||||||
|
'repo_id': self.library_id,
|
||||||
|
'path': f'/{file}',
|
||||||
|
'permissions': {
|
||||||
|
'can_edit': False,
|
||||||
|
'can_download': True
|
||||||
|
},
|
||||||
|
'expire_days': 1
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"https://{self.valves.CLOUD_HOST}/api/v2.1/share-links/",
|
||||||
|
json=json_data,
|
||||||
|
headers=headers
|
||||||
|
)
|
||||||
|
|
||||||
|
res = response.json()
|
||||||
|
|
||||||
|
if response.status_code < 300:
|
||||||
|
print(f"Excel file '{file}' successfully shared: {res['link']}")
|
||||||
|
return res["link"]
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
except requests.RequestException as e:
|
||||||
|
logging.error(f"Unable to share {file}. Error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
def run_llm_query(self, message: str):
|
def run_llm_query(self, message: str):
|
||||||
try:
|
try:
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
|
|
@ -103,10 +299,40 @@ class Pipeline:
|
||||||
with self.engine.connect() as connection:
|
with self.engine.connect() as connection:
|
||||||
result = connection.execute(text(query))
|
result = connection.execute(text(query))
|
||||||
rows = result.fetchall()
|
rows = result.fetchall()
|
||||||
return str(rows)
|
return rows
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
def give_excel_book_description(self, message: str, query: str, result: str):
|
||||||
|
llm_instructions = f"""
|
||||||
|
Based on the input question, MySQL query and query result, give a nice text summary in NORWEGIAN
|
||||||
|
of the data you have extracted from the database. The text will be used in a Excel spreadsheet export
|
||||||
|
|
||||||
|
Input question {message}
|
||||||
|
MySQLQuery: {query}
|
||||||
|
Query result: {result}
|
||||||
|
"""
|
||||||
|
|
||||||
|
return llm_instructions
|
||||||
|
|
||||||
|
def generate_table_headers(self, message: str, query: str, result: str):
|
||||||
|
llm_instructions = f"""
|
||||||
|
Based on the input question, MySQL query and query result, give the columns in Query result the appropriate
|
||||||
|
header names translated to NORWEGIAN.
|
||||||
|
|
||||||
|
IMPORTANT: Return a python list formatted as string with ONLY the column header names, not the resut of the query result.
|
||||||
|
DO NOT encapsulate your reply in markdown or any other formatting beside the python list formatted as a string.
|
||||||
|
Example of valid string to generate: ["ID", "Navn", "Epostadresse", "Dato opprettet"]
|
||||||
|
|
||||||
|
Input question {message}
|
||||||
|
MySQLQuery: {query}
|
||||||
|
Query result: {result}
|
||||||
|
|
||||||
|
Python list formatted string of table header names for columns as they appear in "Query result":
|
||||||
|
"""
|
||||||
|
|
||||||
|
return llm_instructions
|
||||||
|
|
||||||
def reformat_data(self, message: str, query: str, result: str):
|
def reformat_data(self, message: str, query: str, result: str):
|
||||||
llm_reformat_instructions = f"""
|
llm_reformat_instructions = f"""
|
||||||
Given an input question, create a syntactically correct mysql query to run. You have 4 tables to work with:
|
Given an input question, create a syntactically correct mysql query to run. You have 4 tables to work with:
|
||||||
|
|
@ -178,22 +404,118 @@ class Pipeline:
|
||||||
MySQLQuery:
|
MySQLQuery:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Vi sjekker om body['stream'] er sann som indikerer at pipelinen kjører brukerens forespørsel
|
||||||
|
if body['stream']:
|
||||||
|
|
||||||
|
# Vi kjører første inferens med instruksjonene i llm_initial_instructions for å fortelle
|
||||||
|
# KI modellen at vi ønsker en SQL spørring til svar basert på user_message (det brukeren
|
||||||
|
# skrev/spurte i chatten)
|
||||||
initial = self.run_llm_query(llm_initial_instructions)
|
initial = self.run_llm_query(llm_initial_instructions)
|
||||||
|
|
||||||
|
# Hvis inferensen er vellykket, fortsett til neste trinn
|
||||||
if initial["success"]:
|
if initial["success"]:
|
||||||
|
|
||||||
|
# Query inneholder SQL spørringen som er generert av KI modellen
|
||||||
query = initial["data"]
|
query = initial["data"]
|
||||||
|
|
||||||
|
# Vi kjører SQL spørrringen og lagrer resultatet og rådataene i en python toupple datasett
|
||||||
query_result = self.run_mysql_query(query)
|
query_result = self.run_mysql_query(query)
|
||||||
|
|
||||||
|
# Instruer KI om å generere en beskrivelse for regnearket, samt kolonnenavn
|
||||||
|
instruction_result = self.run_llm_query(
|
||||||
|
self.give_excel_book_description(user_message, query, str(query_result))
|
||||||
|
)
|
||||||
|
|
||||||
|
reply_description = instruction_result["data"]
|
||||||
|
|
||||||
|
instruction_result = self.run_llm_query(
|
||||||
|
self.generate_table_headers(user_message, query, str(query_result))
|
||||||
|
)
|
||||||
|
|
||||||
|
header_names = instruction_result["data"]
|
||||||
|
|
||||||
|
# Legg til dataene i excel arket
|
||||||
|
original_file_path = f"{self.valves.TMP_STORAGE_DIR}/{self.valves.CLOUD_EXCEL_TEMPLATE_FILENAME}"
|
||||||
|
excel_start_row = 4
|
||||||
|
wb = load_workbook(original_file_path)
|
||||||
|
ws = wb.active
|
||||||
|
|
||||||
|
# Kopier logoen til ØMS til det nye regnearket
|
||||||
|
img = Image(f"{self.valves.TMP_STORAGE_DIR}/oms-glow.png")
|
||||||
|
img.height = 145
|
||||||
|
img.width = 139
|
||||||
|
img.anchor = 'A1'
|
||||||
|
ws.add_image(img)
|
||||||
|
|
||||||
|
|
||||||
|
# Sett overskrift
|
||||||
|
headline = "Østfold Milsim KI dataeksport"
|
||||||
|
ws.merge_cells('B1:E1')
|
||||||
|
cell = ws.cell(row=1, column=2, value=headline)
|
||||||
|
cell.alignment = Alignment(horizontal='left', vertical='top', wrap_text=True)
|
||||||
|
cell.font = Font(size=22, bold=True)
|
||||||
|
|
||||||
|
# Siden reply_description er en forhåndsvis stort volum med tekst, juster stilen og egenskapene til cellen
|
||||||
|
ws.merge_cells('A2:E2')
|
||||||
|
cell = ws.cell(row=2,column=1, value=reply_description)
|
||||||
|
cell.alignment = Alignment(horizontal='left', vertical='top', wrap_text=True)
|
||||||
|
cell.font = Font(size=10)
|
||||||
|
ws.row_dimensions[2].height = 300
|
||||||
|
ws.row_dimensions[1].height = 145
|
||||||
|
|
||||||
|
header_list = ast.literal_eval(header_names)
|
||||||
|
for i, row_data in enumerate(header_list, start=1):
|
||||||
|
cell = ws.cell(row=3, column=i, value=row_data)
|
||||||
|
cell.font = Font(bold=True)
|
||||||
|
|
||||||
|
for i, row_data in enumerate(query_result, start=excel_start_row):
|
||||||
|
for j, value in enumerate(row_data, start=1): # Column A corresponds to index 1
|
||||||
|
ws.cell(row=i, column=j, value=value)
|
||||||
|
|
||||||
|
# Lagre og last opp excel arket til skyen
|
||||||
|
timestamp = int(time.time() * 1000) # Datotidsstempel i UNIX tid med millisekunder for å gi filenavnet et unikt navn
|
||||||
|
new_file_name = f"ny_eksport_{timestamp}.xlsx"
|
||||||
|
new_file_path = f'{self.valves.TMP_STORAGE_DIR}/{new_file_name}'
|
||||||
|
wb.save(new_file_path)
|
||||||
|
self.sea_upload(new_file_path, new_file_name)
|
||||||
|
|
||||||
|
# Instruer skyen om å opprette en deling av filen
|
||||||
|
share_file = self.sea_share(new_file_name)
|
||||||
|
|
||||||
|
# Hvis det oppstod en syntaksfeil eller annen feil med spørringen, returner feilen i chatten
|
||||||
if isinstance(query_result, dict) and "error" in query_result:
|
if isinstance(query_result, dict) and "error" in query_result:
|
||||||
return f"Error occurred: {query_result['error']}. Initial data: {initial['data']}"
|
return f"Error occurred: {query_result['error']}. Initial data: {initial['data']}"
|
||||||
|
|
||||||
formatted = self.reformat_data(user_message, query, query_result)
|
# formatted vil inneholde andre trinns instruksjoner for KI modellen, inkludert:
|
||||||
|
# 1. user_message - brukerens innledende forespørsel til KI modellen
|
||||||
|
# 2. query - SQL spørringen KI modellen genererte i første inferens trinn
|
||||||
|
# 3. query_result - Rådata fra databasen
|
||||||
|
formatted = self.reformat_data(user_message, query, str(query_result))
|
||||||
|
|
||||||
|
# Vi kjører andre inferens med andre trinns instruksjoner for KI modellen
|
||||||
formatted_result = self.run_llm_query(formatted)
|
formatted_result = self.run_llm_query(formatted)
|
||||||
|
|
||||||
|
# Data vil inneholde KI modellens svar fra andre inferens trinn
|
||||||
data = formatted_result["data"]
|
data = formatted_result["data"]
|
||||||
|
|
||||||
|
# Legg ved linken til excel arket i chatten
|
||||||
|
if share_file:
|
||||||
|
data += f"\n\n## Eksport:\nI tilfelle resultatet fra spørringen overgår begrensningene i antall tokens i svaret, har jeg eksportert dataene du spurte om i et eget excel ark hvor du kan se hele datasettet. Du kan laste ned Excel arket her:\n1. [Regneark delelink på outlands.no]({share_file})"
|
||||||
|
|
||||||
|
# Hvis resultatet er vellykket returner andre inferens trinn svar i chatten
|
||||||
if formatted_result["success"]:
|
if formatted_result["success"]:
|
||||||
return data
|
return data
|
||||||
|
|
||||||
return f"Error occured: {data}"
|
return f"Error occured: {data}"
|
||||||
|
|
||||||
|
# Hvis første trinn av inferensen ikke var velykket, returner feilmelding i chatten
|
||||||
else:
|
else:
|
||||||
data = initial["data"]
|
data = initial["data"]
|
||||||
return f"Error occured: {data}"
|
return f"Error occured: {data}"
|
||||||
|
|
||||||
|
# Hvis body['stream'] ikke er sann, indikerer det at open webui kjører inferens for å generere
|
||||||
|
# tittel for chatten, eller autocomplete. I så tilfelle vil vi bare kjøre inferens på
|
||||||
|
# brukerens melding og ikke selve databaseforespørselen
|
||||||
|
else:
|
||||||
|
response = self.run_llm_query(user_message)
|
||||||
|
return response["data"]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user