Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions app/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@ import { logChatCompletionDiagnostics } from '@/services/diagnostics';
import { MessageInput } from '@/components/ui/chat/MessageInput';
import { Model } from '@/services/models';
import { VoiceModeOverlay } from '@/components/VoiceModeScreen';
import { streamLlamaCompletion, generateUniqueId } from '@/services/chat/llama-local';
import { streamLlamaCompletion, generateUniqueId, loadConversationIntoContext, initializeConversationContext } from '@/services/chat/llama-local';


export default function ChatScreen() {
const [messages, setMessages] = useState<Message[]>([]);
const [currentAIMessage, setCurrentAIMessage] = useState<string>('');
const [isStreaming, setIsStreaming] = useState(false);
const [voiceMode, setVoiceMode] = useState(false);
const [isConversationLoaded, setIsConversationLoaded] = useState(false);

const scrollViewRef = useRef<any>(null);
const {
Expand All @@ -47,6 +48,7 @@ export default function ChatScreen() {
setMessages([]);
saveCurrentConversation([]);
}
setIsConversationLoaded(true);
};

loadConversationState();
Expand Down Expand Up @@ -126,7 +128,7 @@ export default function ChatScreen() {
}

const sendMessage = async (inputText: string) => {
if (!inputText.trim() || isStreaming) return;
if (!inputText.trim() || isStreaming || !isConversationLoaded) return;
if (!selectedModel) return;

const userMessage: Message = createUserMessage(inputText, selectedModel);
Expand All @@ -137,7 +139,8 @@ export default function ChatScreen() {

setIsStreaming(true);
try {
// await sendChatMessage(
// In v0.1.4, pass only the updated messages array which includes the new user message
// The context will handle the latest message from this array
await streamLlamaCompletion(
cactusContext.context,
updatedMessages,
Expand All @@ -147,6 +150,7 @@ export default function ChatScreen() {
true,
tokenGenerationLimit,
isReasoningEnabled,
voiceMode
);
} catch (error) {
console.error('Error in chat:', error);
Expand Down
8 changes: 5 additions & 3 deletions components/VoiceModeScreen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ export const VoiceModeOverlay = ({
setMessages(updatedMessages);
await streamLlamaCompletion(
cactusContext.context,
updatedMessages,
updatedMessages, // Pass the full array, context will handle the latest message
currentModel,
setAiMessageText,
(metrics, model, completeMessage) => {
Expand All @@ -62,7 +62,7 @@ export const VoiceModeOverlay = ({
true
);
}
}, [messages, selectedModelRef, tokenGenerationLimit])//, setMessages, setAiMessageText, setIsProcessing]);
}, [messages, selectedModelRef, tokenGenerationLimit, cactusContext.context])

// Called when speech recognition encounters an error
const onSpeechError = useCallback((e: any) => {
Expand Down Expand Up @@ -186,7 +186,9 @@ export const VoiceModeOverlay = ({
{isListening && <Text textAlign="center">Listening...</Text>}
</YStack>
<YStack position='absolute' bottom='10%' width='80%' gap="$2">
<Text fontSize={12} textAlign='center' color="$gray10">This voice experience is currently powered by Apple Speech and not by Cactus Voice</Text>
<Text fontSize={12} textAlign='center' color="$gray10">
This voice experience is currently powered by Apple Speech and not by Cactus Voice
</Text>
{errorMessage && <Text color="$red10">Error: {errorMessage}</Text>}
</YStack>
</YStack>
Expand Down
6 changes: 5 additions & 1 deletion components/ui/chat/MessageInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ function MessageInputComponent({ sendMessage, isStreaming, selectedModel, setVoi

const handlePause = () => {
console.log('pause!')
cactusContext.context?.stopCompletion();
// In v0.1.4, the context object may have different method signatures
// Make sure stopCompletion exists before calling it
if (cactusContext.context?.stopCompletion) {
cactusContext.context.stopCompletion();
}
}

return (
Expand Down
2 changes: 2 additions & 0 deletions contexts/modelContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ export const ModelProvider = ({ children }: { children: React.ReactNode }) => {
n_ctx: 2048,
n_gpu_layers: gpuLayers
});
// In v0.1.4, context maintains its own message history
// Context starts fresh when initialized, so no need to explicitly rewind here
const endTime = performance.now();
logModelLoadDiagnostics({model: selectedModel.value, loadTime: endTime - startTime});
setCactusContext({
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"@tamagui/shorthands": "^1.125.33",
"@tamagui/themes": "^1.125.33",
"@tamagui/web": "^1.125.33",
"cactus-react-native": "^0.0.1",
"cactus-react-native": "^0.1.4",
"expo": "~52.0.43",
"expo-blur": "~14.0.3",
"expo-constants": "~17.0.8",
Expand Down
83 changes: 70 additions & 13 deletions services/chat/llama-local.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,65 @@ export interface ChatCompleteCallback {
(metrics: ModelMetrics, model: Model, completeMessage: string): void;
}

/**
* Initialize the conversation context with system message for v0.1.4
*/
export async function initializeConversationContext(
context: LlamaContext,
voiceMode?: boolean
): Promise<void> {
const systemMessage = {
role: 'system',
content: `You are Cactus, a very capable AI assistant running offline on a smartphone. ${voiceMode ? 'Keep your messages VERY short. One-two sentences max.' : ''}`
};

// Initialize context with system message
await context.completion({
messages: [systemMessage],
n_predict: 1, // Minimal prediction just to set the system message
stop: ['</s>']
});
}

/**
* Load existing conversation into context for v0.1.4
*/
export async function loadConversationIntoContext(
context: LlamaContext,
messages: Message[],
voiceMode?: boolean
): Promise<void> {
if (messages.length === 0) {
// If no messages, just initialize with system message
await initializeConversationContext(context, voiceMode);
return;
}

if (context.stopCompletion) await context.stopCompletion();
await Promise.race([context.rewind(), new Promise((_, reject) => setTimeout(reject, 3000))]);

// Format all messages including system message
const systemMessage = {
role: 'system',
content: `You are Cactus, a very capable AI assistant running offline on a smartphone. ${voiceMode ? 'Keep your messages VERY short. One-two sentences max.' : ''}`
};

const formattedMessages = [
systemMessage,
...messages.map(msg => ({
role: msg.isUser ? 'user' : 'assistant',
content: msg.text
}))
];

// Load all messages in one call to restore conversation context
await context.completion({
messages: formattedMessages,
n_predict: 1, // Minimal prediction just to load the context
stop: ['</s>']
});
}

export async function streamLlamaCompletion(
context: LlamaContext | null,
messages: Message[],
Expand All @@ -21,7 +80,8 @@ export async function streamLlamaCompletion(
streaming: boolean = true,
maxTokens: number,
isReasoningEnabled: boolean,
voiceMode?: boolean
voiceMode?: boolean,
isFirstMessage?: boolean
) {
try {
console.log('Ensuring Llama context...', new Date().toISOString());
Expand All @@ -34,16 +94,13 @@ export async function streamLlamaCompletion(
'<|im_end|>', '<|EOT|>', '<|END_OF_TURN_TOKEN|>',
'<|end_of_turn|>', '<|endoftext|>', '<end_of_turn>', '<|end_of_sentence|>'];

const formattedMessages = [
{
role: 'system',
content: `You are Cactus, a very capable AI assistant running offline on a smartphone. ${voiceMode ? 'Keep your messages VERY short. One-two sentences max.' : ''}`
},
...messages.map(msg => ({
role: msg.isUser ? 'user' : 'assistant',
content: msg.text
}))
];
// In v0.1.4, the context maintains its own message history
// So we only need to pass the latest message
const latestMessage = messages[messages.length - 1];
const formattedLatestMessage = {
role: latestMessage.isUser ? 'user' : 'assistant',
content: latestMessage.text
};

const startTime = performance.now();
let firstTokenTime: number | null = null;
Expand All @@ -58,7 +115,7 @@ export async function streamLlamaCompletion(
if (streaming) {
const result = await context.completion(
{
messages: formattedMessages,
messages: [formattedLatestMessage], // Only pass the latest message
n_predict: maxTokens,
stop: stopWords,
},
Expand All @@ -82,7 +139,7 @@ export async function streamLlamaCompletion(
onComplete(modelMetrics, model, responseText);
} else {
const result = await context.completion({
messages: formattedMessages,
messages: [formattedLatestMessage],
n_predict: 1024,
stop: stopWords,
});
Expand Down