Public
Edited
Jan 9
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
chunk_size=400,
chunk_overlap=20, # number of tokens overlap between chunks
length_function=tiktoken_len,
separators=['\n\n', '\n', ' ', '']
)
Insert cell
Insert cell
import hashlib
m = hashlib.md5() # this will convert URL into unique ID

url = docs[5].metadata['source'].replace('rtdocs/', 'https://')
print(url)

# convert URL to unique ID
m.update(url.encode('utf-8'))
uid = m.hexdigest()[:12]
print(uid)
Insert cell
Insert cell
from tqdm.auto import tqdm

documents = []

for doc in tqdm(docs):
url = doc.metadata['source'].replace('rtdocs/', 'https://')
m.update(url.encode('utf-8'))
uid = m.hexdigest()[:12]
chunks = text_splitter.split_text(doc.page_content)
for i, chunk in enumerate(chunks):
documents.append({
'id': f'{uid}-{i}',
'text': chunk,
'source': url
})

len(documents)
Insert cell
import json

with open('train.jsonl', 'w') as f:
for doc in documents:
f.write(json.dumps(doc) + '\n')
To load the data from file we'd write:

documents = []

with open('train.jsonl', 'r') as f:
for line in f:
documents.append(json.loads(line))

len(documents)
documents[0]
Insert cell

from langchain.agents import initialize_agent

zero_shot_agent = initialize_agent(
agent="zero-shot-react-description",
tools=tools,
llm=llm,
verbose=True,
max_iterations=3
)
Insert cell
Insert cell
Insert cell
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

prompt = PromptTemplate(
input_variables=["query"],
template="{query}"
)

llm_chain = LLMChain(llm=llm, prompt=prompt)

# initialize the LLM tool
llm_tool = Tool(
name='Language Model',
func=llm_chain.run,
description='use this tool for general purpose queries and logic'
)
Insert cell
Insert cell
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA

# chat completion llm
llm = ChatOpenAI(
openai_api_key=openai_api_key,
model_name='gpt-3.5-turbo',
temperature=0.0
)
# conversational memory
conversational_memory = ConversationBufferWindowMemory(
memory_key='chat_history',
k=5,
return_messages=True
)
# retrieval qa chain
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever()
)
Insert cell
Insert cell
qa.run(query)
Insert cell
Insert cell
from langchain.agents import Tool

tools = [
Tool(
name='Knowledge Base',
func=qa.run,
description=(
'use this tool when answering general knowledge queries to get '
'more information about the topic'
)
)
]
Insert cell
Insert cell
from langchain.agents import initialize_agent

agent = initialize_agent(
agent='chat-conversational-react-description',
tools=tools,
llm=llm,
verbose=True,
max_iterations=3,
early_stopping_method='generate',
memory=conversational_memory
)
Insert cell
agent(query)
Insert cell
Insert cell
Insert cell
https://github.com/pinecone-io/examples/blob/master/generation/langchain/handbook/07-langchain-tools.ipynb
Insert cell
Insert cell
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history")
conversational_agent = initialize_agent(
agent='conversational-react-description',
tools=tools,
llm=llm,
verbose=True,
max_iterations=3,
memory=memory,
)
result = count_tokens(
conversational_agent,
"Please provide me the stock prices for ABC on January the 1st"
)
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more