Merge 44578649c2 into af6d4c2711
commit
205618891f
|
|
@ -158,3 +158,5 @@ cython_debug/
|
|||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
.DS_Store
|
||||
|
|
|
|||
16
Dockerfile
16
Dockerfile
|
|
@ -1,11 +1,21 @@
|
|||
FROM python:3.11.6-alpine
|
||||
FROM python:3.12.2-slim-bookworm
|
||||
|
||||
# Install system dependencies required for Python packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
libffi-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN apk add --no-cache build-base libffi-dev
|
||||
RUN pip install poetry
|
||||
|
||||
WORKDIR /mnt
|
||||
COPY pyproject.toml poetry.lock .
|
||||
|
||||
# Copy only the files needed for the poetry installation to avoid cache invalidation
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
RUN poetry install --no-root --only main
|
||||
|
||||
# Copy the application
|
||||
COPY . .
|
||||
|
||||
ENTRYPOINT ["poetry", "run", "python", "discollama.py"]
|
||||
|
|
|
|||
11
compose.yaml
11
compose.yaml
|
|
@ -19,3 +19,14 @@ services:
|
|||
- /data
|
||||
ports:
|
||||
- 6379
|
||||
|
||||
chroma:
|
||||
image: ghcr.io/chroma-core/chroma:latest
|
||||
volumes:
|
||||
- index_data:/chroma/.chroma/index
|
||||
ports:
|
||||
- 8000:8000
|
||||
|
||||
volumes:
|
||||
index_data:
|
||||
driver: local
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -6,6 +6,7 @@ import argparse
|
|||
from datetime import datetime, timedelta
|
||||
|
||||
import ollama
|
||||
import chromadb
|
||||
import discord
|
||||
import redis
|
||||
|
||||
|
|
@ -46,11 +47,12 @@ class Response:
|
|||
|
||||
|
||||
class Discollama:
|
||||
def __init__(self, ollama, discord, redis, model):
|
||||
def __init__(self, ollama, discord, redis, model, collection):
|
||||
self.ollama = ollama
|
||||
self.discord = discord
|
||||
self.redis = redis
|
||||
self.model = model
|
||||
self.collection = collection
|
||||
|
||||
# register event handlers
|
||||
self.discord.event(self.on_ready)
|
||||
|
|
@ -100,6 +102,29 @@ class Discollama:
|
|||
reference_message.content,
|
||||
]
|
||||
)
|
||||
|
||||
# retrieve relevant context from vector store
|
||||
knowledge = self.collection.query(
|
||||
query_texts=[content],
|
||||
n_results=2
|
||||
)
|
||||
# directly unpack the first list of documents if it exists, or use an empty list
|
||||
documents = knowledge.get('documents', [[]])[0]
|
||||
|
||||
content = '\n'.join(
|
||||
[
|
||||
'Using the provided document, answer the user question to the best of your ability. You must try to use information from the provided document. Combine information in the document into a coherent answer.',
|
||||
'If there is nothing in the document relevant to the user question, say \'Hmm, I don\'t know about that, try referencing the docs.\', before providing any other information you know.',
|
||||
'Anything between the following `document` html blocks is retrieved from a knowledge bank, not part of the conversation with the user.',
|
||||
'<document>',
|
||||
'\n'.join(documents) if documents else '',
|
||||
'</document>',
|
||||
'Anything between the following `user` html blocks is part of the conversation with the user.',
|
||||
'<user>',
|
||||
content,
|
||||
'</user>',
|
||||
]
|
||||
)
|
||||
|
||||
if not context:
|
||||
context = await self.load(channel_id=channel.id)
|
||||
|
|
@ -152,10 +177,40 @@ class Discollama:
|
|||
def run(self, token):
|
||||
try:
|
||||
self.discord.run(token)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
logging.exception("An error occurred while running the bot: %s", e)
|
||||
self.redis.close()
|
||||
|
||||
|
||||
def embed_data(collection):
|
||||
logging.info('embedding data...')
|
||||
documents = []
|
||||
ids = []
|
||||
# read all data from the data folder
|
||||
for filename in os.listdir('data'):
|
||||
if filename.endswith('.json'):
|
||||
filepath = os.path.join('data', filename)
|
||||
with open(filepath, 'r') as file:
|
||||
try:
|
||||
data = json.load(file)
|
||||
if isinstance(data, list):
|
||||
for index, item in enumerate(data):
|
||||
documents.append(item)
|
||||
file_id = f"{filename.rsplit('.', 1)[0]}-{index}"
|
||||
ids.append(file_id)
|
||||
else:
|
||||
logging.warning("The file {filename} is not a JSON array.")
|
||||
except json.JSONDecodeError as e:
|
||||
logging.exception(f"Error decoding JSON from file {filename}: {e}")
|
||||
except Exception as e:
|
||||
logging.exception(f"An error occurred while processing file {filename}: {e}")
|
||||
# store the data in chroma for look-up
|
||||
collection.add(
|
||||
documents=documents,
|
||||
ids=ids,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
|
|
@ -174,11 +229,16 @@ def main():
|
|||
intents = discord.Intents.default()
|
||||
intents.message_content = True
|
||||
|
||||
chroma = chromadb.Client()
|
||||
collection = chroma.get_or_create_collection(name='discollama')
|
||||
embed_data(collection)
|
||||
|
||||
Discollama(
|
||||
ollama.AsyncClient(host=f'{args.ollama_scheme}://{args.ollama_host}:{args.ollama_port}'),
|
||||
discord.Client(intents=intents),
|
||||
redis.Redis(host=args.redis_host, port=args.redis_port, db=0, decode_responses=True),
|
||||
model=args.ollama_model,
|
||||
collection=collection,
|
||||
).run(os.environ['DISCORD_TOKEN'])
|
||||
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -10,6 +10,7 @@ python = "^3.11"
|
|||
discord-py = "^2.3.1"
|
||||
redis = "^5.0.1"
|
||||
ollama = "^0.1.0"
|
||||
chromadb = "^0.4.24"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
|
|
|||
Loading…
Reference in New Issue