col1, col2 = st.columns([4,1])
with col1:
    user_message = st.text_input('Type your message', key='user_input', on_change=submit)

with col2:
    uploaded = st.file_uploader('Attach File',key='uploaded_file',type=['pdf','txt'],help='Optional: attach a PDF or text file to read')
    if uploaded and st.session_state.get('uploaded_file_processed_for',None) == uploaded.name:
        st.caption = 'File Ready'

def process_uploaded_file(uploaded_file):
    """
    Extract text content from uploaded PDF or TXT file
    Returns dict with filename and content
    """
    try:
        if uploaded_file.type == "application/pdf":
            text = extract_text_from_pdf(uploaded_file)
        elif uploaded_file.type == "text/plain":
            text = uploaded_file.read().decode('utf-8')
        else:
            text = "Unsupported file type"
        
        return {
            "filename": uploaded_file.name,
            "content": text,
            "size": len(text)
        }
    except Exception as e:
        return {
            "filename": uploaded_file.name if uploaded_file else "Unknown",
            "content": f"Error reading file: {str(e)}",
            "size": 0
        }

def extract_text_from_pdf(uploaded_file):
    """
    Extract text content from an uploaded PDF file
    
    Args:
        uploaded_file: Streamlit UploadedFile object
        
    Returns:
        str: Extracted text from all pages of the PDF
    """
    try:
        # Create a PDF reader object
        pdf_reader = PyPDF2.PdfReader(uploaded_file)
        
        # Initialize empty string to store text
        text = ""
        
        # Loop through all pages and extract text
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"
        
        # Return the extracted text, stripped of extra whitespace
        return text.strip()
    
    except Exception as e:
        return f"Error extracting text from PDF: {str(e)}"


def run_internet_agent(user_message, memory_context, file_data=None):
    """
    MODIFIED: Now accepts file_data dictionary with filename and content
    If file exists, agent answers based on the file
    Otherwise, uses web search as before
    """
    
    # CHANGE: If file data exists, answer from file instead of searching
    if file_data and file_data.get("content"):
        filename = file_data.get("filename", "uploaded document")
        file_content = file_data.get("content", "")
        
        # Truncate if too long but keep reasonable context
        max_length = 8000  # Increased from 3000
        if len(file_content) > max_length:
            file_content = file_content[:max_length] + "\n\n[Document truncated for length...]"
        
        SYSTEM_INSTRUCTION = f"""You are a helpful AI assistant analyzing a document.

The user has uploaded a file named: "{filename}"

DOCUMENT CONTENT:
{file_content}

MEMORY CONTEXT:
{memory_context}

IMPORTANT INSTRUCTIONS:
- The user is asking about THIS specific document they uploaded
- Answer questions directly about the content you see above
- Be specific and reference actual content from the document
- If asked "tell me about this document/file/policy", provide a comprehensive summary
- Do not say you don't have access to the file - you can see its content above
- Be clear, accurate, and helpful"""
        
        # Use LLM directly without web search for file analysis
        try:
            response = chat(
                model="llama3.2",
                messages=[
                    {"role": "system", "content": SYSTEM_INSTRUCTION},
                    {"role": "user", "content": user_message}
                ]
            )
            return response["message"]["content"]
        except Exception as e:
            return f"Error analyzing file: {str(e)}"
    
    # CHANGE: Original web search logic if no file
    else:
        SYSTEM_INSTRUCTION = f"""
        You are a highly flexible and helpful AI research assistant. Your primary goal is to answer the user's question, using the available tools only when necessary.
        
        **MEMORY CONTEXT:**
        {memory_context}
        
        **FLEXIBLE FORMATTING RULES:**
        1.  **PRIORITY:** Adhere strictly to any formatting requested by the user (e.g., "return as bullet points," "write a short paragraph," or "output as JSON").
        2.  **LINK MANDATE (Hard Rule):** For every piece of factual information, movie, book, or specific data point you mention that came from the web search tool, you MUST embed the source URL from the search results directly into the text using **Markdown link syntax: [Relevant Text](Source URL)**.
        """
        
        search_tool = DuckDuckGoSearchRun()
        tools = [search_tool]
        llm = ChatOllama(
            model="llama3.2",
            temperature=0.1,
        )
        agent_executor = create_agent(llm, tools, system_prompt=SYSTEM_INSTRUCTION)
        
        final_response = None
        
        try:
            for chunk in agent_executor.stream(
                {"messages": [HumanMessage(content=user_message)]},
                stream_mode="values"
            ):
                latest_message = chunk["messages"][-1]
                
                # Check message type
                if latest_message.type == "ai":
                    # Check if this is the final answer (no tool calls)
                    has_tool_calls = (
                        hasattr(latest_message, 'tool_calls') 
                        and latest_message.tool_calls 
                        and len(latest_message.tool_calls) > 0
                    )
                    
                    if not has_tool_calls:
                        # This is the final answer
                        final_response = latest_message.content
                        # Don't break - let it complete
            
            # Return the final response
            if final_response:
                return final_response
            else:
                return "I couldn't find an answer. Please try rephrasing your question."
                
        except Exception as e:
            return f"Error during search: {str(e)}"



def submit():
    user_message = st.session_state['user_input']
    if user_message:
        
        if 'active_profile' not in st.session_state or st.session_state.active_profile != username:
            st.session_state.active_profile = username
        
        if username:
            CHROMA_DIR = f".chroma_db_{username}"
            client, collection = init_chromadb(CHROMA_DIR)
        else:
            CHROMA_DIR = ".chroma_db"
            client, collection = init_chromadb(CHROMA_DIR)
        
        st.session_state.history.append(("user", user_message))
        save_chat_message(client, collection, role="user", text=user_message, user_id=username)
        
        memory_context = get_memory_context(collection, user_message, user_id=username, history_limit=30)
        
        # CHANGE: Check if file is uploaded and process it
        file_data = None
        uploaded = st.session_state.get("uploaded_file", None)
        if uploaded:
            # Process file and get data dict
            file_data = process_uploaded_file(uploaded)
            st.session_state["uploaded_file_processed_for"] = uploaded.name
            st.session_state["current_file_data"] = file_data  # Store for persistent access
        elif st.session_state.get("current_file_data"):
            # Use previously processed file data if still available
            file_data = st.session_state["current_file_data"]
        
        
        result = run_internet_agent(user_message, memory_context, file_data)
        st.session_state.history.append(("agent", result))
        
        save_chat_message(client, collection, role="agent", text=result, user_id=username)
        
        if 'empty_key' not in st.session_state:
            st.session_state.empty_key = ''
        st.session_state.empty_key = st.session_state.user_input
        st.session_state.user_input = ''