From c6e814daa80eec01c10f319ebebcb84c42cd26e1 Mon Sep 17 00:00:00 2001 From: KernelDeimos Date: Wed, 11 Dec 2024 20:51:00 -0500 Subject: [PATCH] fix: add fallback moderation in case openai goes down --- .../src/modules/puterai/AIChatService.js | 41 ++++++++-- .../src/modules/puterai/lib/AsModeration.js | 77 +++++++++++++++++++ 2 files changed, 111 insertions(+), 7 deletions(-) create mode 100644 src/backend/src/modules/puterai/lib/AsModeration.js diff --git a/src/backend/src/modules/puterai/AIChatService.js b/src/backend/src/modules/puterai/AIChatService.js index 0cca9f13..b036d7ec 100644 --- a/src/backend/src/modules/puterai/AIChatService.js +++ b/src/backend/src/modules/puterai/AIChatService.js @@ -6,6 +6,7 @@ const { DB_WRITE } = require("../../services/database/consts"); const { TypeSpec } = require("../../services/drivers/meta/Construct"); const { TypedValue } = require("../../services/drivers/meta/Runtime"); const { Context } = require("../../util/context"); +const { AsModeration } = require("./lib/AsModeration"); // Maximum number of fallback attempts when a model fails, including the first attempt const MAX_FALLBACKS = 3 + 1; // includes first attempt @@ -489,11 +490,6 @@ class AIChatService extends BaseService { * Returns true if OpenAI service is unavailable or all messages pass moderation. */ async moderate ({ messages }) { - const svc_openai = this.services.get('openai-completion'); - - // We can't use moderation of openai service isn't available - if ( ! svc_openai ) return true; - for ( const msg of messages ) { const texts = []; if ( typeof msg.content === 'string' ) texts.push(msg.content); @@ -508,8 +504,39 @@ class AIChatService extends BaseService { const fulltext = texts.join('\n'); - const mod_result = await svc_openai.check_moderation(fulltext); - if ( mod_result.flagged ) return false; + let mod_last_error = null; + let mod_result = null; + try { + const svc_openai = this.services.get('openai-completion'); + mod_result = await svc_openai.check_moderation(fulltext); + if ( mod_result.flagged ) return false; + } catch (e) { + console.error(e); + mod_last_error = e; + } + try { + const svc_claude = this.services.get('claude'); + const chat = svc_claude.as('puter-chat-completion'); + const mod = new AsModeration({ + chat, + model: 'claude-3-haiku-20240307', + }) + if ( ! await mod.moderate(fulltext) ) { + return false; + } + mod_last_error = null; + } catch (e) { + console.error(e); + mod_last_error = e; + } + + if ( mod_last_error ) { + this.log.error('moderation error', { + fulltext, + mod_last_error, + }); + throw new Error('no working moderation service'); + } } return true; } diff --git a/src/backend/src/modules/puterai/lib/AsModeration.js b/src/backend/src/modules/puterai/lib/AsModeration.js new file mode 100644 index 00000000..2f20c76d --- /dev/null +++ b/src/backend/src/modules/puterai/lib/AsModeration.js @@ -0,0 +1,77 @@ +const { default: dedent } = require("dedent"); + +class AsModeration { + constructor ({ chat, model }) { + this.chat = chat; + this.model = model; + } + + async moderate (text) { + // copied from Anthropic documentation + // https://docs.anthropic.com/en/docs/about-claude/use-case-guides/content-moderation + const unsafe_categories = [ + 'Child Exploitation', + 'Conspiracy Theories', + 'Hate', + 'Indiscriminate Weapons', + 'Intellectual Property', + 'Non-Violent Crimes', + 'Privacy', + 'Self-Harm', + 'Sex Crimes', + 'Sexual Content', + 'Specialized Advice', + 'Violent Crimes' + ]; + + const unsafe_category_str = unsafe_categories.join('\n'); + + const assessment_prompt = dedent(` + Determine whether the following message warrants moderation, + based on the unsafe categories outlined below. + + Message: + + ${text + .replace('<', '<') + .replace('>', '>') + } + + + Unsafe Categories: + + ${unsafe_category_str} + + + Respond with ONLY a JSON object, using the format below: + {{ + "violation": , + "categories": [Comma-separated list of violated categories], + "explanation": [Optional. Only include if there is a violation.] + }} + `); + + const result = await this.chat.complete({ + messages: [ + { + role: 'user', + content: assessment_prompt, + } + ] + }); + + console.log('result???', require('util').inspect(result, { depth: null })); + + const str = result.message?.content?.[0]?.text ?? + result.messages?.[0]?.content?.[0]?.text ?? + '{ "violation": true }'; + + const parsed = JSON.parse(str); + console.log('parsed?', parsed); + return ! parsed.violation; + } +} + +module.exports = { + AsModeration, +};