col1, col2 = st.columns([4,1]) with col1: user_message = st.text_input('Type your message', key='user_input', on_change=submit) with col2: uploaded = st.file_uploader('Attach File',key='uploaded_file',type=['pdf','txt'],help='Optional: attach a PDF or text file to read') if uploaded and st.session_state.get('uploaded_file_processed_for',None) == uploaded.name: st.caption = 'File Ready' def process_uploaded_file(uploaded_file): """ Extract text content from uploaded PDF or TXT file Returns dict with filename and content """ try: if uploaded_file.type == "application/pdf": text = extract_text_from_pdf(uploaded_file) elif uploaded_file.type == "text/plain": text = uploaded_file.read().decode('utf-8') else: text = "Unsupported file type" return { "filename": uploaded_file.name, "content": text, "size": len(text) } except Exception as e: return { "filename": uploaded_file.name if uploaded_file else "Unknown", "content": f"Error reading file: {str(e)}", "size": 0 } def extract_text_from_pdf(uploaded_file): """ Extract text content from an uploaded PDF file Args: uploaded_file: Streamlit UploadedFile object Returns: str: Extracted text from all pages of the PDF """ try: # Create a PDF reader object pdf_reader = PyPDF2.PdfReader(uploaded_file) # Initialize empty string to store text text = "" # Loop through all pages and extract text for page in pdf_reader.pages: text += page.extract_text() + "\n" # Return the extracted text, stripped of extra whitespace return text.strip() except Exception as e: return f"Error extracting text from PDF: {str(e)}" def run_internet_agent(user_message, memory_context, file_data=None): """ MODIFIED: Now accepts file_data dictionary with filename and content If file exists, agent answers based on the file Otherwise, uses web search as before """ # CHANGE: If file data exists, answer from file instead of searching if file_data and file_data.get("content"): filename = file_data.get("filename", "uploaded document") file_content = file_data.get("content", "") # Truncate if too long but keep reasonable context max_length = 8000 # Increased from 3000 if len(file_content) > max_length: file_content = file_content[:max_length] + "\n\n[Document truncated for length...]" SYSTEM_INSTRUCTION = f"""You are a helpful AI assistant analyzing a document. The user has uploaded a file named: "{filename}" DOCUMENT CONTENT: {file_content} MEMORY CONTEXT: {memory_context} IMPORTANT INSTRUCTIONS: - The user is asking about THIS specific document they uploaded - Answer questions directly about the content you see above - Be specific and reference actual content from the document - If asked "tell me about this document/file/policy", provide a comprehensive summary - Do not say you don't have access to the file - you can see its content above - Be clear, accurate, and helpful""" # Use LLM directly without web search for file analysis try: response = chat( model="llama3.2", messages=[ {"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": user_message} ] ) return response["message"]["content"] except Exception as e: return f"Error analyzing file: {str(e)}" # CHANGE: Original web search logic if no file else: SYSTEM_INSTRUCTION = f""" You are a highly flexible and helpful AI research assistant. Your primary goal is to answer the user's question, using the available tools only when necessary. **MEMORY CONTEXT:** {memory_context} **FLEXIBLE FORMATTING RULES:** 1. **PRIORITY:** Adhere strictly to any formatting requested by the user (e.g., "return as bullet points," "write a short paragraph," or "output as JSON"). 2. **LINK MANDATE (Hard Rule):** For every piece of factual information, movie, book, or specific data point you mention that came from the web search tool, you MUST embed the source URL from the search results directly into the text using **Markdown link syntax: [Relevant Text](Source URL)**. """ search_tool = DuckDuckGoSearchRun() tools = [search_tool] llm = ChatOllama( model="llama3.2", temperature=0.1, ) agent_executor = create_agent(llm, tools, system_prompt=SYSTEM_INSTRUCTION) final_response = None try: for chunk in agent_executor.stream( {"messages": [HumanMessage(content=user_message)]}, stream_mode="values" ): latest_message = chunk["messages"][-1] # Check message type if latest_message.type == "ai": # Check if this is the final answer (no tool calls) has_tool_calls = ( hasattr(latest_message, 'tool_calls') and latest_message.tool_calls and len(latest_message.tool_calls) > 0 ) if not has_tool_calls: # This is the final answer final_response = latest_message.content # Don't break - let it complete # Return the final response if final_response: return final_response else: return "I couldn't find an answer. Please try rephrasing your question." except Exception as e: return f"Error during search: {str(e)}" def submit(): user_message = st.session_state['user_input'] if user_message: if 'active_profile' not in st.session_state or st.session_state.active_profile != username: st.session_state.active_profile = username if username: CHROMA_DIR = f".chroma_db_{username}" client, collection = init_chromadb(CHROMA_DIR) else: CHROMA_DIR = ".chroma_db" client, collection = init_chromadb(CHROMA_DIR) st.session_state.history.append(("user", user_message)) save_chat_message(client, collection, role="user", text=user_message, user_id=username) memory_context = get_memory_context(collection, user_message, user_id=username, history_limit=30) # CHANGE: Check if file is uploaded and process it file_data = None uploaded = st.session_state.get("uploaded_file", None) if uploaded: # Process file and get data dict file_data = process_uploaded_file(uploaded) st.session_state["uploaded_file_processed_for"] = uploaded.name st.session_state["current_file_data"] = file_data # Store for persistent access elif st.session_state.get("current_file_data"): # Use previously processed file data if still available file_data = st.session_state["current_file_data"] result = run_internet_agent(user_message, memory_context, file_data) st.session_state.history.append(("agent", result)) save_chat_message(client, collection, role="agent", text=result, user_id=username) if 'empty_key' not in st.session_state: st.session_state.empty_key = '' st.session_state.empty_key = st.session_state.user_input st.session_state.user_input = ''