diff --git a/nanobot/agent/memory_mem0.py b/nanobot/agent/memory_mem0.py
index 19741af..ac1db29 100644
--- a/nanobot/agent/memory_mem0.py
+++ b/nanobot/agent/memory_mem0.py
@@ -45,100 +45,8 @@ class Mem0MemoryStore:
         # Build custom extraction prompt tuned for nanobot conversations
         from datetime import datetime
 
-        custom_prompt = f"""# Nanobot Fact Extraction Prompt
-# Version: 1.0
-# Date: {datetime.now().strftime("%Y-%m-%d")}
-
-You are an information organizer for a personal AI assistant. Extract memorable facts from conversations between a user and their AI assistant.
-
-## Context
-Unlike consumer chatbots where users share personal details, this assistant is used for research, debugging, and task execution. Extract facts from BOTH user messages (what they care about / asked for) AND assistant messages (what was found / accomplished).
-
-## What to Extract
-1. **User interests and focus areas**: Topics the user asked to research or investigate
-2. **Research findings**: Key facts, comparisons, or conclusions the assistant surfaced
-3. **Technical work**: Systems debugged, problems solved, tools built or configured
-4. **User preferences revealed through requests**: (e.g., asking for smart ring research implies interest in wearables)
-5. **Project context**: What the user is working on, building, or planning
-6. **Tools and tech stack**: Technologies, services, or platforms mentioned or used
-
-## What NOT to Extract
-- Command invocations or shell output
-- JSON/code blobs or raw data
-- Routine assistant acknowledgments ("Let me search for that", "I'll help you with that")
-- Timestamps, file paths, or technical identifiers unless specifically significant
-- Messages that are just "/new" or other system commands
-- Tool use metadata or Python list notation artifacts
-
-## Examples
-
-### Example 1: Research Query with Findings
-Input:
-user: research smart rings that can help with ADHD focus tracking
-assistant: I'll search for that. The top options are Oura Ring (best sleep data for ADHD correlation), Ultrahuman Ring AIR (real-time metabolic tracking), and RingConn (continuous SpO2). For ADHD specifically, Oura's HRV and sleep stage data correlate with focus capacity.
-
-Output: {{"facts": [
-  "User researching smart rings for ADHD focus tracking",
-  "Oura Ring: best sleep stage data for ADHD (HRV correlates with focus)",
-  "Ultrahuman Ring AIR: real-time metabolic tracking",
-  "RingConn: continuous SpO2 monitoring"
-]}}
-
-### Example 2: Debugging/Technical Work
-Input:
-user: debug why mem0 is not extracting facts
-assistant: Found the issue: mem0's default prompt only extracts from user messages, but nanobot users send commands not personal info. Also assistant content with tool_use arrives as a Python list which gets corrupted.
-
-Output: {{"facts": [
-  "Debugged mem0 fact extraction issue",
-  "Root cause: default prompt ignores assistant messages",
-  "Assistant content corruption: list format not handled by parse_messages()"
-]}}
-
-### Example 3: Geopolitical Research
-Input:
-user: look at ukraine geopolitics situation
-assistant: Key developments: NATO eastern flank reinforcement accelerated, grain deal negotiations stalled, EU economic impact is asymmetric with Eastern European members bearing higher costs.
-
-Output: {{"facts": [
-  "User researched Ukraine geopolitics",
-  "NATO eastern flank reinforcement accelerated (2026)",
-  "Ukraine grain deal negotiations stalled",
-  "EU economic impact from Ukraine conflict is asymmetric, Eastern Europe most affected"
-]}}
-
-### Example 4: Skip - Just Tool Output
-Input:
-assistant: [{{'type': 'tool_use', 'id': 'tu_1', 'name': 'bash', ...}}]
-tool: $ ls -la\\ntotal 48\\ndrwxr-xr-x 12 user staff...
-
-Output: {{"facts": []}}
-
-### Example 5: Skip - System Commands
-Input:
-user: /new
-
-Output: {{"facts": []}}
-
-### Example 6: Skip - No Meaningful Content
-Input:
-assistant: Let me help you with that.
-user: ok
-
-Output: {{"facts": []}}
-
-## Instructions
-- Today's date is {datetime.now().strftime("%Y-%m-%d")}.
-- Extract from BOTH user and assistant messages.
-- Prefer specific, searchable facts over vague summaries.
-- Combine related user question + assistant answer into unified facts when possible.
-- For transient/time-sensitive facts (location, health data, weather, notifications), ALWAYS include the date or time. Write "On 2026-03-01, Makar was in Barcelona" NOT "Makar is in Barcelona".
-- Never phrase facts as present-tense universal truths when they are time-bound observations.
-- Return empty list if the conversation contains only commands, tool output, or no meaningful substance.
-- Respond only with the JSON object: {{"facts": ["fact1", "fact2", ...]}}, no other text.
-
-Here is the conversation to extract facts from:
-"""
+        today = datetime.now().strftime("%Y-%m-%d")
+        custom_prompt = f"Extract dated facts from this conversation as JSON: {{\"facts\": [...]}}. Today is {today}.\n\n"
 
         # Initialize mem0 with optional config + custom prompt
         # Extract only MemoryConfig-relevant fields
@@ -150,6 +58,7 @@ Here is the conversation to extract facts from:
                 mem0_cfg_dict[key] = raw_config[key]
         logger.debug(f"Extracted for MemoryConfig: {list(mem0_cfg_dict.keys())}")
         logger.debug(f"Custom prompt length: {len(custom_prompt)} chars")
+        self.custom_prompt = custom_prompt
         mem0_cfg_dict["custom_fact_extraction_prompt"] = custom_prompt
         mem0_config = MemoryConfig(**mem0_cfg_dict)
         logger.debug(f"MemoryConfig created: vector_store={mem0_config.vector_store.provider if mem0_config.vector_store else None}")
@@ -243,6 +152,96 @@ Here is the conversation to extract facts from:
         except Exception as e:
             logger.error(f"Mem0 add failed: {e}")
 
+    async def extract_facts(
+        self,
+        messages: list[dict[str, Any]],
+        provider: Any,
+        model: str,
+    ) -> list[str]:
+        """
+        Extract facts from conversation using the main agent's LLM provider.
+
+        Uses the same provider/model already running (e.g. Haiku via Claude Max),
+        avoiding a separate LLM call to mem0's default GPT-nano.
+        """
+        import json as _json
+
+        # Build conversation text for extraction
+        conv_text = ""
+        for msg in messages:
+            role = msg.get("role", "unknown")
+            content = msg.get("content", "")
+            if isinstance(content, str) and content.strip():
+                conv_text += f"{role}: {content}\n\n"
+
+        if not conv_text.strip():
+            return []
+
+        extraction_messages = [
+            {"role": "user", "content": self.custom_prompt + conv_text}
+        ]
+
+        try:
+            response = await provider.chat(
+                messages=extraction_messages,
+                model=model,
+                max_tokens=2000,
+                temperature=0.3,
+            )
+
+            # Parse the JSON response — LLMResponse.content is a string
+            text = response.content or ""
+            # Strip markdown code fences if present
+            text = text.strip()
+            if text.startswith("```"):
+                text = text.split("\n", 1)[1] if "\n" in text else text[3:]
+            if text.endswith("```"):
+                text = text[:-3]
+            text = text.strip()
+
+            data = _json.loads(text)
+            facts = data.get("facts", [])
+            logger.debug(f"Extracted {len(facts)} facts using {model}")
+            return facts
+
+        except Exception as e:
+            logger.error(f"Fact extraction failed: {e}")
+            return []
+
+    def store_facts(
+        self,
+        facts: list[str],
+        user_id: str,
+        session_id: str | None = None,
+    ) -> None:
+        """
+        Store pre-extracted facts in mem0 with infer=False.
+
+        Bypasses mem0's built-in LLM extraction — facts are already
+        in final form from extract_facts().
+        """
+        if not facts:
+            return
+
+        metadata = {}
+        if session_id:
+            metadata["session_id"] = session_id
+
+        stored = 0
+        for fact in facts:
+            try:
+                self.memory.add(
+                    fact,
+                    user_id=user_id,
+                    infer=False,
+                    metadata=metadata if metadata else None,
+                )
+                stored += 1
+            except Exception as e:
+                logger.error(f"Failed to store fact '{fact[:50]}...': {e}")
+
+        logger.info(f"Stored {stored}/{len(facts)} facts for user {user_id}")
+
     def get_memory_context(
         self,
         query: str,
@@ -312,8 +311,7 @@ Here is the conversation to extract facts from:
         """
         Consolidate session messages into mem0 memory.
 
-        Unlike the original MemoryStore, mem0 handles extraction automatically,
-        so this just needs to feed recent messages to mem0.
+        Facts are extracted using the main agent's LLM provider, then stored with infer=False.
 
         Returns True on success.
         """
@@ -400,19 +398,10 @@ Here is the conversation to extract facts from:
                 })
 
             if mem0_messages:
-                # Debug: log what we're sending to mem0
-                import json
-                logger.debug(f"Mem0 consolidation sending {len(mem0_messages)} messages:")
-                for i, msg in enumerate(mem0_messages[:5]):  # Log first 5
-                    preview = msg['content'][:200] if len(msg['content']) > 200 else msg['content']
-                    logger.debug(f"  [{i}] {msg['role']}: {preview}")
-
-                # Add to mem0 - it handles extraction automatically
-                self.add_conversation(
-                    mem0_messages,
-                    user_id=user_id,
-                    session_id=session.key
-                )
+                # Extract facts using the main agent's LLM (already paid for),
+                # then store with infer=False to bypass mem0's GPT-nano
+                facts = await self.extract_facts(mem0_messages, provider, model)
+                self.store_facts(facts, user_id=user_id, session_id=session.key)
 
             # Update consolidation marker
             if archive_all:
diff --git a/nanobot/agent/tools/anthropic/bash.py b/nanobot/agent/tools/anthropic/bash.py
index 1bc2cb1..df469f7 100644
--- a/nanobot/agent/tools/anthropic/bash.py
+++ b/nanobot/agent/tools/anthropic/bash.py
@@ -71,9 +71,10 @@ class _BashSession:
         assert self._process.stdout
         assert self._process.stderr
 
-        # Send command + sentinel
+        # Send command + sentinel on its own line so heredoc terminators
+        # aren't corrupted (EOF; echo '...' ≠ EOF)
         self._process.stdin.write(
-            command.encode() + f"; echo '{self._sentinel}'\n".encode()
+            command.encode() + f"\necho '{self._sentinel}'\n".encode()
         )
         await self._process.stdin.drain()
 
diff --git a/tests/test_bash_heredoc.py b/tests/test_bash_heredoc.py
new file mode 100644
index 0000000..1d4a658
--- /dev/null
+++ b/tests/test_bash_heredoc.py
@@ -0,0 +1,68 @@
+"""Test that bash tool handles heredoc commands correctly.
+
+Reproduces the bug where `; echo '<<exit>>'` appended on the same line
+as a heredoc terminator prevents bash from recognizing the terminator,
+causing the session to hang forever.
+"""
+
+import asyncio
+import pytest
+from nanobot.agent.tools.anthropic.bash import BashTool20250124
+
+
+@pytest.mark.asyncio
+async def test_heredoc_command():
+    """Heredoc commands must complete without hanging."""
+    tool = BashTool20250124()
+
+    # Simple command works
+    result = await tool(command="echo hello")
+    assert result.output == "hello"
+
+    # Heredoc command — this is the exact pattern that caused the hang
+    result = await asyncio.wait_for(
+        tool(command="cat << 'EOF'\nline1\nline2\nEOF"),
+        timeout=5.0,
+    )
+    assert "line1" in result.output
+    assert "line2" in result.output
+
+
+@pytest.mark.asyncio
+async def test_heredoc_append_to_file():
+    """Heredoc append (the exact pattern the LLM uses) must work."""
+    tool = BashTool20250124()
+
+    result = await asyncio.wait_for(
+        tool(command="cat >> /tmp/test_heredoc_bash.txt << 'EOF'\nhello world\nEOF"),
+        timeout=5.0,
+    )
+    # Should complete without error
+    assert result.error is None or result.error == ""
+
+    # Verify the file was written
+    result2 = await tool(command="cat /tmp/test_heredoc_bash.txt")
+    assert "hello world" in result2.output
+
+    # Cleanup
+    await tool(command="rm -f /tmp/test_heredoc_bash.txt")
+
+
+@pytest.mark.asyncio
+async def test_regular_commands_still_work():
+    """Ensure regular commands still work after the fix."""
+    tool = BashTool20250124()
+
+    # Semicolons in commands
+    result = await tool(command="echo a; echo b")
+    assert "a" in result.output
+    assert "b" in result.output
+
+    # Multiline script
+    result = await tool(command="for i in 1 2 3; do echo $i; done")
+    assert "1" in result.output
+    assert "3" in result.output
+
+    # Command with exit code
+    result = await tool(command="true")
+    assert result.output == "(no output)" or result.output is not None
diff --git a/tests/test_bash_tool.py b/tests/test_bash_tool.py
index 64baeea..ebfe159 100644
--- a/tests/test_bash_tool.py
+++ b/tests/test_bash_tool.py
@@ -40,7 +40,7 @@ async def test_bash_tool_restart():
 
     # Restart
     result = await tool(restart=True)
-    assert "restarted" in result.output.lower()
+    assert "restarted" in (result.system or result.output or "").lower()
 
     # Variable should be gone
     result2 = await tool(command="echo $TEST_VAR")