{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "2ca5bbbf-142b-48ed-89f2-18c63eac1eb0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from dotenv import load_dotenv, find_dotenv\n",
    "_ = load_dotenv(find_dotenv())\n",
    "openai_api_key = os.environ[\"OPENAI_API_KEY\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "42f0647a-3384-4a8d-bc5a-446b58a9ec80",
   "metadata": {},
   "source": [
    "## Basic App: Question & Answering from a Document"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "196e2414-39ff-4643-9c31-b130697e3074",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_openai import OpenAI\n",
    "from langchain.callbacks.tracers import LangChainTracer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "b191eb7d-a9df-47a6-95ac-fdec0b0b1969",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI()\n",
    "tracer = LangChainTracer(project_name=\"SimpleRAG3\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "567ecd8a-34b9-4489-9b6d-7b257919737a",
   "metadata": {},
   "source": [
    "**Load the text file**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "e9801a4d-ecd7-460a-a4db-f0a1711163f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.document_loaders import TextLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "e01cbe49-38ce-40d2-8e01-01052483fbe5",
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = TextLoader(\"data/be-good-and-how-not-to-die.txt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "afff80c1-1cd1-408e-ac28-2a8594af2de6",
   "metadata": {},
   "outputs": [],
   "source": [
    "document = loader.load()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f6ff8b1-c63c-48b4-acaf-b959b98e770d",
   "metadata": {},
   "source": [
    "**The document is loaded as a Python list with metadata**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "684f2ccb-9ec4-4384-b875-3f89608292f8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'list'>\n"
     ]
    }
   ],
   "source": [
    "print(type(document))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "3f712992-beac-420c-b94e-1db729bd9508",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n"
     ]
    }
   ],
   "source": [
    "print(len(document))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "6cb6e6d1-765b-4762-8213-0c56e4829427",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'source': 'data/be-good-and-how-not-to-die.txt'}\n"
     ]
    }
   ],
   "source": [
    "print(document[0].metadata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "eef34dd2-aace-45c9-a87b-c3967370f44f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "You have 1 document.\n"
     ]
    }
   ],
   "source": [
    "print(f\"You have {len(document)} document.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "dfb653b8-8035-40bf-b73c-542c90499486",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Your document has 27419 characters\n"
     ]
    }
   ],
   "source": [
    "print(f\"Your document has {len(document[0].page_content)} characters\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d8fbef1-4447-4ef5-ba21-9211b4486904",
   "metadata": {},
   "source": [
    "**Split the document in small chunks**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "18899367-8f8e-4d47-a61c-8041bafb4cd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.text_splitter import RecursiveCharacterTextSplitter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "65f31176-187b-4e0c-819a-dd35b7e61ee3",
   "metadata": {},
   "outputs": [],
   "source": [
    "text_splitter = RecursiveCharacterTextSplitter(\n",
    "    chunk_size=3000,\n",
    "    chunk_overlap=400\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "0147e8c9-70fb-4990-8bf6-c77ea283200a",
   "metadata": {},
   "outputs": [],
   "source": [
    "document_chunks = text_splitter.split_documents(document)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "86820a0a-e307-4bb9-9b69-24284d9f0754",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Now you have 12 chunks.\n"
     ]
    }
   ],
   "source": [
    "print(f\"Now you have {len(document_chunks)} chunks.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1756c471-5af1-4ba4-8f90-cfc70e897155",
   "metadata": {},
   "source": [
    "**Convert text chunks in numeric vectors (called \"embeddings\")**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "2e40b2df-53ad-4984-8972-cf7258b762fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.embeddings.openai import OpenAIEmbeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "c1e7c4f6-2116-4ce1-8349-640fa6591970",
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings = OpenAIEmbeddings()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "67881eb4-7418-4423-b4b9-b750e4a579cf",
   "metadata": {},
   "source": [
    "**Load the embeddings to a vector database**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "75acf1fc-25a4-437c-bd1b-7310a83bae9e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.vectorstores import FAISS"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "014dc49b-498a-49b0-883c-f323769320b3",
   "metadata": {},
   "source": [
    "*Careful: the next operation is expensive in OpenAI*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "8a2e4dbc-af28-497f-877a-933347c747ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "stored_embeddings = FAISS.from_documents(document_chunks, embeddings)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "40ae930c-5d99-45c5-8597-52457d22b6d1",
   "metadata": {},
   "source": [
    "**Create a Retrieval Question & Answering Chain**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "e30338bd-58ae-467d-a01e-8bef3ae435b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.chains import RetrievalQA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "d9bed1ac-d148-4fb1-9382-bc58db111d73",
   "metadata": {},
   "outputs": [],
   "source": [
    "QA_chain = RetrievalQA.from_chain_type(\n",
    "    llm=llm,\n",
    "    chain_type=\"stuff\",\n",
    "    retriever=stored_embeddings.as_retriever()\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "71a96c6d-a473-4ecc-885d-55f1618487d4",
   "metadata": {},
   "source": [
    "**Now we have a Question & Answering APP**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "c432c922-984b-49b9-b3c1-4a6c5d2ccd00",
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"\"\"\n",
    "Who is the author of this article?\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "7d407274-69c0-4d7f-8def-5e2275027a41",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' \\nThe author of this article is Paul Graham.'"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "QA_chain.run(question, callbacks=[tracer])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "825ce125-42c2-49bd-806b-1c3400778ba3",
   "metadata": {},
   "source": [
    "## New way: with LCEL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "6adbc897-7d4f-4a81-a2bb-229489891849",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts import PromptTemplate\n",
    "\n",
    "template = \"\"\"Answer the question based only on the following context:\n",
    "{context}\n",
    "\n",
    "Question: {question}\n",
    "\"\"\"\n",
    "prompt = PromptTemplate.from_template(template)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "f10dd0bf-2fab-483f-851e-324b5ede2045",
   "metadata": {},
   "outputs": [],
   "source": [
    "retriever = stored_embeddings.as_retriever()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "b1bf635b-99c7-4799-80a2-c882b4836718",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.schema.output_parser import StrOutputParser\n",
    "from langchain.schema.runnable import RunnablePassthrough"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "e0561e55-6dac-49c1-8373-6e0d705c3182",
   "metadata": {},
   "outputs": [],
   "source": [
    "chain = (\n",
    "    {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
    "    | prompt\n",
    "    | llm\n",
    "    | StrOutputParser()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "b87314dc-415e-4a24-938a-61b051ee9d9a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\nThe target audience of this article is aspiring entrepreneurs or startup founders who are looking for advice and guidance on how to succeed in the competitive startup world. It offers tips on how to make a successful product, maintain morale, and avoid common pitfalls.'"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chain.invoke(\"What is the target audience of this article? Describe it in less than 50 words.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cedeadb5-94bd-47bc-b1a9-2c06beb025d6",
   "metadata": {},
   "source": [
    "*See that the previous chain went to the default project since we did not set that differently*"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b6b487c0-b742-45f5-872c-6675ceb3d68a",
   "metadata": {},
   "source": [
    "## Test Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "3be84b64-ba7e-4ef2-8929-aca8c42a7821",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv021524/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The class `langchain_community.chat_models.openai.ChatOpenAI` was deprecated in langchain-community 0.0.10 and will be removed in 0.2.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import ChatOpenAI`.\n",
      "  warn_deprecated(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "View the evaluation results for project 'test-loyal-conference-76' at:\n",
      "https://smith.langchain.com/o/ec6a7494-139b-5170-8044-143bc78622a9/datasets/47d6584f-83a1-40d3-a816-79641cd89c26/compare?selectedSessions=9688ef0e-3e60-4b70-85b2-7b933ef639e4\n",
      "\n",
      "View all tests for Dataset simpleRagDataset at:\n",
      "https://smith.langchain.com/o/ec6a7494-139b-5170-8044-143bc78622a9/datasets/47d6584f-83a1-40d3-a816-79641cd89c26\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Chain failed for example 75b09c91-594c-42e1-8cd5-1f42b0e9df66 with inputs {'query': '\\nWhat is this article about? \\nDescribe it in less than 100 words.\\n'}\n",
      "Error Type: TypeError, Message: expected string or buffer\n",
      "Chain failed for example 3652ddeb-5cc9-457f-b70b-78dc4b720912 with inputs {'query': '\\nWho is the author of this article?\\n'}\n",
      "Error Type: TypeError, Message: expected string or buffer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[------------------------------------------------->] 2/2"
     ]
    }
   ],
   "source": [
    "import langsmith\n",
    "\n",
    "from langchain import chat_models, prompts, smith\n",
    "from langchain.schema import output_parser\n",
    "\n",
    "# Define the evaluators to apply\n",
    "eval_config = smith.RunEvalConfig(\n",
    "    evaluators=[\n",
    "        \"cot_qa\"\n",
    "    ],\n",
    "    custom_evaluators=[],\n",
    "    eval_llm=chat_models.ChatOpenAI(model=\"gpt-4\", temperature=0)\n",
    ")\n",
    "\n",
    "client = langsmith.Client()\n",
    "chain_results = client.run_on_dataset(\n",
    "    dataset_name=\"simpleRagDataset\",\n",
    "    llm_or_chain_factory=chain,\n",
    "    evaluation=eval_config,\n",
    "    project_name=\"test-loyal-conference-76\",\n",
    "    concurrency_level=5,\n",
    "    verbose=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95a2735a-1255-4e6d-a984-e81acf730ec5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
