fix: add fallback moderation in case openai goes down

This commit is contained in:
KernelDeimos 2024-12-11 20:51:00 -05:00
parent 97a1616305
commit c6e814daa8
2 changed files with 111 additions and 7 deletions

View File

@ -6,6 +6,7 @@ const { DB_WRITE } = require("../../services/database/consts");
const { TypeSpec } = require("../../services/drivers/meta/Construct");
const { TypedValue } = require("../../services/drivers/meta/Runtime");
const { Context } = require("../../util/context");
const { AsModeration } = require("./lib/AsModeration");
// Maximum number of fallback attempts when a model fails, including the first attempt
const MAX_FALLBACKS = 3 + 1; // includes first attempt
@ -489,11 +490,6 @@ class AIChatService extends BaseService {
* Returns true if OpenAI service is unavailable or all messages pass moderation.
*/
async moderate ({ messages }) {
const svc_openai = this.services.get('openai-completion');
// We can't use moderation of openai service isn't available
if ( ! svc_openai ) return true;
for ( const msg of messages ) {
const texts = [];
if ( typeof msg.content === 'string' ) texts.push(msg.content);
@ -508,8 +504,39 @@ class AIChatService extends BaseService {
const fulltext = texts.join('\n');
const mod_result = await svc_openai.check_moderation(fulltext);
if ( mod_result.flagged ) return false;
let mod_last_error = null;
let mod_result = null;
try {
const svc_openai = this.services.get('openai-completion');
mod_result = await svc_openai.check_moderation(fulltext);
if ( mod_result.flagged ) return false;
} catch (e) {
console.error(e);
mod_last_error = e;
}
try {
const svc_claude = this.services.get('claude');
const chat = svc_claude.as('puter-chat-completion');
const mod = new AsModeration({
chat,
model: 'claude-3-haiku-20240307',
})
if ( ! await mod.moderate(fulltext) ) {
return false;
}
mod_last_error = null;
} catch (e) {
console.error(e);
mod_last_error = e;
}
if ( mod_last_error ) {
this.log.error('moderation error', {
fulltext,
mod_last_error,
});
throw new Error('no working moderation service');
}
}
return true;
}

View File

@ -0,0 +1,77 @@
const { default: dedent } = require("dedent");
class AsModeration {
constructor ({ chat, model }) {
this.chat = chat;
this.model = model;
}
async moderate (text) {
// copied from Anthropic documentation
// https://docs.anthropic.com/en/docs/about-claude/use-case-guides/content-moderation
const unsafe_categories = [
'Child Exploitation',
'Conspiracy Theories',
'Hate',
'Indiscriminate Weapons',
'Intellectual Property',
'Non-Violent Crimes',
'Privacy',
'Self-Harm',
'Sex Crimes',
'Sexual Content',
'Specialized Advice',
'Violent Crimes'
];
const unsafe_category_str = unsafe_categories.join('\n');
const assessment_prompt = dedent(`
Determine whether the following message warrants moderation,
based on the unsafe categories outlined below.
Message:
<message>
${text
.replace('<', '&lt;')
.replace('>', '&gt;')
}
</message>
Unsafe Categories:
<categories>
${unsafe_category_str}
</categories>
Respond with ONLY a JSON object, using the format below:
{{
"violation": <Boolean field denoting whether the message should be moderated>,
"categories": [Comma-separated list of violated categories],
"explanation": [Optional. Only include if there is a violation.]
}}
`);
const result = await this.chat.complete({
messages: [
{
role: 'user',
content: assessment_prompt,
}
]
});
console.log('result???', require('util').inspect(result, { depth: null }));
const str = result.message?.content?.[0]?.text ??
result.messages?.[0]?.content?.[0]?.text ??
'{ "violation": true }';
const parsed = JSON.parse(str);
console.log('parsed?', parsed);
return ! parsed.violation;
}
}
module.exports = {
AsModeration,
};