diff --git a/src/backend/src/modules/puterai/AIChatService.js b/src/backend/src/modules/puterai/AIChatService.js
index 0cca9f13..b036d7ec 100644
--- a/src/backend/src/modules/puterai/AIChatService.js
+++ b/src/backend/src/modules/puterai/AIChatService.js
@@ -6,6 +6,7 @@ const { DB_WRITE } = require("../../services/database/consts");
const { TypeSpec } = require("../../services/drivers/meta/Construct");
const { TypedValue } = require("../../services/drivers/meta/Runtime");
const { Context } = require("../../util/context");
+const { AsModeration } = require("./lib/AsModeration");
// Maximum number of fallback attempts when a model fails, including the first attempt
const MAX_FALLBACKS = 3 + 1; // includes first attempt
@@ -489,11 +490,6 @@ class AIChatService extends BaseService {
* Returns true if OpenAI service is unavailable or all messages pass moderation.
*/
async moderate ({ messages }) {
- const svc_openai = this.services.get('openai-completion');
-
- // We can't use moderation of openai service isn't available
- if ( ! svc_openai ) return true;
-
for ( const msg of messages ) {
const texts = [];
if ( typeof msg.content === 'string' ) texts.push(msg.content);
@@ -508,8 +504,39 @@ class AIChatService extends BaseService {
const fulltext = texts.join('\n');
- const mod_result = await svc_openai.check_moderation(fulltext);
- if ( mod_result.flagged ) return false;
+ let mod_last_error = null;
+ let mod_result = null;
+ try {
+ const svc_openai = this.services.get('openai-completion');
+ mod_result = await svc_openai.check_moderation(fulltext);
+ if ( mod_result.flagged ) return false;
+ } catch (e) {
+ console.error(e);
+ mod_last_error = e;
+ }
+ try {
+ const svc_claude = this.services.get('claude');
+ const chat = svc_claude.as('puter-chat-completion');
+ const mod = new AsModeration({
+ chat,
+ model: 'claude-3-haiku-20240307',
+ })
+ if ( ! await mod.moderate(fulltext) ) {
+ return false;
+ }
+ mod_last_error = null;
+ } catch (e) {
+ console.error(e);
+ mod_last_error = e;
+ }
+
+ if ( mod_last_error ) {
+ this.log.error('moderation error', {
+ fulltext,
+ mod_last_error,
+ });
+ throw new Error('no working moderation service');
+ }
}
return true;
}
diff --git a/src/backend/src/modules/puterai/lib/AsModeration.js b/src/backend/src/modules/puterai/lib/AsModeration.js
new file mode 100644
index 00000000..2f20c76d
--- /dev/null
+++ b/src/backend/src/modules/puterai/lib/AsModeration.js
@@ -0,0 +1,77 @@
+const { default: dedent } = require("dedent");
+
+class AsModeration {
+ constructor ({ chat, model }) {
+ this.chat = chat;
+ this.model = model;
+ }
+
+ async moderate (text) {
+ // copied from Anthropic documentation
+ // https://docs.anthropic.com/en/docs/about-claude/use-case-guides/content-moderation
+ const unsafe_categories = [
+ 'Child Exploitation',
+ 'Conspiracy Theories',
+ 'Hate',
+ 'Indiscriminate Weapons',
+ 'Intellectual Property',
+ 'Non-Violent Crimes',
+ 'Privacy',
+ 'Self-Harm',
+ 'Sex Crimes',
+ 'Sexual Content',
+ 'Specialized Advice',
+ 'Violent Crimes'
+ ];
+
+ const unsafe_category_str = unsafe_categories.join('\n');
+
+ const assessment_prompt = dedent(`
+ Determine whether the following message warrants moderation,
+ based on the unsafe categories outlined below.
+
+ Message:
+
+ ${text
+ .replace('<', '<')
+ .replace('>', '>')
+ }
+
+
+ Unsafe Categories:
+
+ ${unsafe_category_str}
+
+
+ Respond with ONLY a JSON object, using the format below:
+ {{
+ "violation": ,
+ "categories": [Comma-separated list of violated categories],
+ "explanation": [Optional. Only include if there is a violation.]
+ }}
+ `);
+
+ const result = await this.chat.complete({
+ messages: [
+ {
+ role: 'user',
+ content: assessment_prompt,
+ }
+ ]
+ });
+
+ console.log('result???', require('util').inspect(result, { depth: null }));
+
+ const str = result.message?.content?.[0]?.text ??
+ result.messages?.[0]?.content?.[0]?.text ??
+ '{ "violation": true }';
+
+ const parsed = JSON.parse(str);
+ console.log('parsed?', parsed);
+ return ! parsed.violation;
+ }
+}
+
+module.exports = {
+ AsModeration,
+};