use ollama client

pull/8/merge
Michael Yang 2023-12-24 19:31:17 -08:00
parent 35c6f1a270
commit 5ba591fbfb
1 changed files with 145 additions and 125 deletions

View File

@ -1,160 +1,180 @@
import io
import os import os
import json import json
import aiohttp import asyncio
import discord
import argparse import argparse
from redis import Redis from datetime import datetime, timedelta
import logging import ollama
import discord
import redis
intents = discord.Intents.default() from logging import getLogger
intents.message_content = True
client = discord.Client(intents=intents) # piggy back on the logger discord.py set up
logging = getLogger('discord.discollama')
@client.event class Response:
async def on_ready(): def __init__(self, message):
self.message = message
self.channel = message.channel
self.r = None
self.sb = io.StringIO()
async def write(self, s, end=''):
if self.sb.seek(0, io.SEEK_END) + len(s) + len(end) > 2000:
self.r = None
self.sb.seek(0, io.SEEK_SET)
self.sb.truncate()
self.sb.write(s)
if self.r:
await self.r.edit(content=self.sb.getvalue() + end)
return
if self.channel.type == discord.ChannelType.text:
self.channel = await self.channel.create_thread(name='Discollama Says', message=self.message, auto_archive_duration=60)
self.r = await self.channel.send(self.sb.getvalue())
class Discollama:
def __init__(self, ollama, discord, redis):
self.ollama = ollama
self.discord = discord
self.redis = redis
# register event handlers
self.discord.event(self.on_ready)
self.discord.event(self.on_message)
async def on_ready(self):
activity = discord.Activity(name='Discollama', state='Ask me anything!', type=discord.ActivityType.custom)
await self.discord.change_presence(activity=activity)
logging.info( logging.info(
'Ready! Invite URL: %s', 'Ready! Invite URL: %s',
discord.utils.oauth_url( discord.utils.oauth_url(
client.application_id, self.discord.application_id,
permissions=discord.Permissions(read_messages=True, send_messages=True), permissions=discord.Permissions(
read_messages=True,
send_messages=True,
create_public_threads=True,
),
scopes=['bot'], scopes=['bot'],
)) ),
async def generate_response(prompt, context=[]):
body = {
key: value
for key, value in {
'model': args.ollama_model,
'prompt': prompt,
'context': context,
}.items() if value
}
async with aiohttp.ClientSession() as session:
async with session.post(
f'http://{args.ollama_host}:{args.ollama_port}/api/generate',
json=body) as r:
async for line in r.content:
yield json.loads(line)
async def buffered_generate_response(prompt, context=[]):
buffer = ''
async for part in generate_response(prompt, context):
if error := part.get('error'):
raise Exception(error)
if part['done']:
yield buffer, part
break
buffer += part['response']
if len(buffer) >= args.buffer_size:
yield buffer, part
buffer = ''
def save_session(response, part):
context = part.get('context', [])
redis.json().set(f'ollama:{response.id}', '$', {'context': context})
redis.expire(f'ollama:{response.id}', 60 * 60 * 24 * 7)
logging.info('saving message=%s: len(context)=%d', response.id, len(context))
def load_session(reference):
kwargs = {}
if reference:
context = redis.json().get(f'ollama:{reference.message_id}', '.context')
kwargs['context'] = context or []
if kwargs.get('context'):
logging.info(
'loading message=%s: len(context)=%d',
reference.message_id,
len(kwargs['context']),
) )
return kwargs async def on_message(self, message):
if self.discord.user == message.author:
# don't respond to ourselves
@client.event
async def on_message(message):
if message.author == client.user:
return return
if client.user.id in message.raw_mentions: if not self.discord.user.mentioned_in(message):
raw_content = message.content.replace(f'<@{client.user.id}>', '').strip() # don't respond to messages that don't mention us
if raw_content.strip() == '': return
raw_content = 'Tell me about yourself.'
response = None content = message.content.replace(f'<@{self.discord.user.id}>', '').strip()
response_content = '' if not content:
async with message.channel.typing(): content = 'Hi!'
await message.add_reaction('🤔')
channel = message.channel
context = [] context = []
if reference := message.reference: if reference := message.reference:
if session := load_session(message.reference): context = await self.load(message_id=reference.message_id)
context = session.get('context', []) if not context:
else: reference_message = await message.channel.fetch_message(reference.message_id)
reference_message = await message.channel.fetch_message( content = '\n'.join(
reference.message_id) [
reference_content = reference_message.content content,
raw_content = '\n'.join([ 'Use this to answer the question if it is relevant, otherwise ignore it:',
raw_content, reference_message.content,
'Use it to answer the prompt:', ]
reference_content, )
])
async for buffer, part in buffered_generate_response( if not context:
raw_content, context = await self.load(channel_id=channel.id)
context=context,
):
response_content += buffer
if part['done']:
save_session(response, part)
break
if not response: r = Response(message)
response = await message.reply(response_content) task = asyncio.create_task(self.thinking(message))
await message.remove_reaction('🤔', client.user) async for part in self.generate(content, context):
continue task.cancel()
if len(response_content) + 3 >= 2000: await r.write(part['response'], end='...')
response = await response.reply(buffer)
response_content = buffer
continue
await response.edit(content=response_content + '...') await r.write('')
await self.save(r.channel.id, message.id, part['context'])
await response.edit(content=response_content) async def thinking(self, message, timeout=999):
try:
await message.add_reaction('🤔')
async with message.channel.typing():
await asyncio.sleep(timeout)
except Exception:
pass
finally:
await message.remove_reaction('🤔', self.discord.user)
async def generate(self, content, context):
sb = io.StringIO()
t = datetime.now()
async for part in await self.ollama.generate(model='llama2', prompt=content, context=context, stream=True):
sb.write(part['response'])
if part['done'] or datetime.now() - t > timedelta(seconds=1):
part['response'] = sb.getvalue()
yield part
t = datetime.now()
sb.seek(0, io.SEEK_SET)
sb.truncate()
async def save(self, channel_id, message_id, ctx: list[int]):
self.redis.set(f'discollama:channel:{channel_id}', message_id, ex=60 * 60 * 24 * 7)
self.redis.set(f'discollama:message:{message_id}', json.dumps(ctx), ex=60 * 60 * 24 * 7)
async def load(self, channel_id=None, message_id=None) -> list[int]:
if channel_id:
message_id = self.redis.get(f'discollama:channel:{channel_id}')
ctx = self.redis.get(f'discollama:message:{message_id}')
return json.loads(ctx) if ctx else []
def run(self, token):
try:
self.discord.run(token)
except Exception:
self.redis.close()
default_ollama_host = os.getenv('OLLAMA_HOST', '127.0.0.1') def main():
default_ollama_port = os.getenv('OLLAMA_PORT', 11434)
default_ollama_model = os.getenv('OLLAMA_MODEL', 'llama2')
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--ollama-host', default=default_ollama_host)
parser.add_argument('--ollama-port', default=default_ollama_port, type=int)
parser.add_argument('--ollama-model', default=default_ollama_model, type=str)
parser.add_argument('--redis-host', default='localhost') parser.add_argument('--ollama-scheme', default=os.getenv('OLLAMA_SCHEME', 'http'), choices=['http', 'https'])
parser.add_argument('--redis-port', default=6379) parser.add_argument('--ollama-host', default=os.getenv('OLLAMA_HOST', '127.0.0.1'), type=str)
parser.add_argument('--ollama-port', default=os.getenv('OLLAMA_PORT', 11434), type=int)
parser.add_argument('--ollama-model', default=os.getenv('OLLAMA_MODEL', 'llama2'), type=str)
parser.add_argument('--redis-host', default=os.getenv('REDIS_HOST', '127.0.0.1'), type=str)
parser.add_argument('--redis-port', default=os.getenv('REDIS_PORT', 6379), type=int)
parser.add_argument('--buffer-size', default=32, type=int) parser.add_argument('--buffer-size', default=32, type=int)
args = parser.parse_args() args = parser.parse_args()
try: intents = discord.Intents.default()
redis = Redis(host=args.redis_host, port=args.redis_port) intents.message_content = True
client.run(os.getenv('DISCORD_TOKEN'), root_logger=True)
except KeyboardInterrupt:
pass
redis.close() Discollama(
ollama.AsyncClient(base_url=f'{args.ollama_scheme}://{args.ollama_host}:{args.ollama_port}'),
discord.Client(intents=intents),
redis.Redis(host=args.redis_host, port=args.redis_port, db=0, decode_responses=True),
).run(os.environ['DISCORD_TOKEN'])
if __name__ == '__main__':
main()