- # from langchain.indexes import VectorstoreIndexCreator
- # from langchain.chains import RetrievalQA
- # from langchain.llms import OpenAI
- # import os
- # os.environ["OPENAI_API_KEY"] = "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"
- # loader = CSVLoader(file_path='Consolidated Chatgpt commentary_Nifty 50_Pilot1 - Copy.csv',encoding='utf-8')
- # index_creator = VectorstoreIndexCreator()
- # docsearch = index_creator.from_loaders([loader])
- # chain = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0.7,model_name='gpt-3.5-turbo'), chain_type="stuff", retriever=docsearch.vectorstore.as_retriever(), input_key="question")
- # query = "on which date change dod was highest"
- # response = chain({"question": query})
- # print(response['result'])
- import os
- import ast
- from langchain.llms import OpenAI
- from langchain.document_loaders import TextLoader
- from langchain.document_loaders import PyPDFLoader
- from langchain.indexes import VectorstoreIndexCreator
- import streamlit as st
- from streamlit_chat import message
- import pandas as pd
- from langchain.agents import create_pandas_dataframe_agent
- from langchain.chat_models import ChatOpenAI
- from langchain.llms import OpenAI
- from langchain.agents import create_csv_agent
- from langchain.agents.agent_types import AgentType
- from fpdf import FPDF
- import spacy
- import matplotlib.pyplot as plt
- import seaborn as sns
- from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
- from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
- from langchain.vectorstores import FAISS
- from langchain.chat_models import ChatOpenAI
- from langchain.memory import ConversationBufferMemory
- from langchain.chains import ConversationalRetrievalChain
- import openai
- import tiktoken
- os.environ["OPENAI_API_KEY"] = "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"
- openai.api_key="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"
- def summarize_text(chunk):
- response = openai.Completion.create(
- engine='text-davinci-003',
- prompt=chunk,
- max_tokens=500, # Adjust as per your desired summary length
- temperature=0.3, # Controls the randomness of the output (0.0 - 1.0)
- n = 1 # Number of completions to generate
- )
- return response.choices[0].text.strip()
- def text_to_chunks(text):
- nlp = spacy.load("en_core_web_sm")
- chunks = [[]]
- chunk_total_words = 0
- sentences = nlp(text)
- for sentence in sentences.sents:
- chunk_total_words += len(sentence.text.split(" "))
- if chunk_total_words > 2000:
- chunks.append([])
- chunk_total_words = len(sentence.text.split(" "))
- chunks[len(chunks)-1].append(sentence.text)
- return chunks
- def num_tokens_from_string(string: str, encoding_name: str) -> int:
- encoding = tiktoken.get_encoding(encoding_name)
- num_tokens = len(encoding.encode(string))
- return num_tokens
- def get_text_chunks(text):
- text_splitter = RecursiveCharacterTextSplitter(
- chunk_size=2000,
- chunk_overlap=200,
- length_function=len
- )
- chunks = text_splitter.split_text(text)
- return chunks
- def get_vectorstore(text_chunks):
- embeddings = OpenAIEmbeddings()
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
- return vectorstore
- def get_conversation_chain(vectorstore):
- llm = ChatOpenAI(temperature=0.5,model_name='gpt-3.5-turbo')
- memory = ConversationBufferMemory(
- memory_key='chat_history', return_messages=True)
- conversation_chain = ConversationalRetrievalChain.from_llm(
- llm=llm,
- # chain_type= "map_reduce",
- chain_type= "refine",
- retriever=vectorstore.as_retriever(),
- memory=memory
- )
- return conversation_chain
- def get_python_repl_ast(data):
- """Get the python_repl_ast data from the list of data.
- Args:
- data: The list of data.
- Returns:
- The python_repl_ast data.
- """
- return data.split("tool_input")[1].split("=")[1].split(",")[0].replace('"','')
- model_id = "gpt-3.5-turbo"
- # df = pd.read_csv('CustomeCSV.csv')
- # print(df.loc[df['City'] == 'Bengaluru', 'Company'])
- # llm = OpenAI(temperature=0,model_name="gpt-3.5-turbo")
- # agent = create_pandas_dataframe_agent(
- # llm,
- # df,
- # verbose=True
- # )
- # agent.run("Give me top 100 company name having more than 5 investors. Result should be in a tabular format with three rows Company Name,no of investor,City")
- # print(df[df['No. of Investors'] > 5].head(100)[['Company', 'No. of Investors', 'City']])
- # csv_agent = create_csv_agent(ChatOpenAI(temperature=0), 'CustomeCSV.csv', verbose=True,return_intermediate_steps=True, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)
- # # data=csv_agent.run("You need to provide date,commentarty,change where change has positive value. Result should be in a table format.")
- # response = csv_agent({"input":"First convert column Date into datetime format.Please fetch and consolidate the values of 'Nifty Closing', 'Commentary, from date 03/8/2023 to 5/24/2023'. Post that please summarise the value in 3 to 4 lines and mention the opening and closing price of nifty along with dates"})
- # if "df" in str(response["output"]) and len(str(response["output"]))<20:
- # print(eval(response["output"]))
- # else:
- # print(response["output"])
- # # print(response["intermediate_steps"])
- # data=str(response["intermediate_steps"])
- # df = pd.read_csv('CustomeCSV.csv')
- # try:
- # print(eval(data.split("tool_input=")[1].split(", log=")[0].replace('"','')))
- # except Exception as e:
- # print("")
- df = pd.read_csv('CustomeCSV.csv')
- # print(df)
- from_date='03/06/2023'
- to_date='05/25/2023'
- df['Date'] = pd.to_datetime(df['Date'])
- opening_price=df[df['Date']==from_date]['NIFTY Closing'].values
- closing_price=df[df['Date']==to_date]['NIFTY Closing'].values
- df.set_index('Date', inplace=True)
- filtered_df = df.loc[from_date:to_date]
- min_value = filtered_df['Change'].min()
- max_value = filtered_df['Change'].max()
- avg_value = round(filtered_df['Change'].mean(), 2)
- df = pd.read_csv('CustomeCSV.csv')
- df['Date'] = pd.to_datetime(df['Date'])
- summarising="The opening price for Nifty on date {0} was {1} and closing price for Nifty on date {2} was {3}. Maximum change between date {0} and {2} was {4} and lowest change was {5} with average change value {6}".format(from_date,opening_price,to_date,closing_price,max_value,min_value,avg_value)
- df=df[df['Date'].between(from_date, to_date)][['Date','Change', 'NIFTY Closing', 'NIFTY Commentary', 'Top 3 Gainers', 'Top 3 Losers']]
- text_all=''
- summary=""
- for index, row in df.iterrows():
- # print(row['Date'])
- text_all=text_all+"\n\n"+str(row['Date'])+ " "+str('NIFTY Closing')+" "+str(row['Change'])+ " "+str('NIFTY Commentary')+" "+" "+str(row['Top 3 Gainers'])+ " "+" "+str(row['Top 3 Losers'])
- # text_all="\n"+str(row['Date'])+ " "+str('NIFTY Closing')+" "+str(row['Change'])+ " "+str('NIFTY Commentary')+" "+" "+str(row['Top 3 Gainers'])+ " "+" "+str(row['Top 3 Losers'])
- # summary=summary+"\n "+summarize_text("summarise the sentence with necessary key points in short and it should contains date closing price change in value"+str(text_all))
- # print(len(text_all))
- # print(text_to_chunks(text_all))
- # for row in text_to_chunks(text_all):
- # print("-"*50)
- # # print(row)
- # # print(num_tokens_from_string(str(row), "cl100k_base"))
- # summary=summary+" "+summarize_text("Summarise the points in 1 to 3 lines:\n "+str(row))
- # # print(summarize_text("Summarise the points in 1 to 3 lines:\n "+str(row)))
- # print(summarising)
- # print(summary)
- # # print(get_text_chunks(text_all))
- text_chunks=get_text_chunks(text_all)
- vectorstore=get_vectorstore(text_chunks)
- response=get_conversation_chain(vectorstore)
- result = response({"question": "Summarise the content in details"})
- data="\n"+ str(result['answer'])
- # data="\n"+ str(summary)
- print(summarising + str(data))
- # import csv
- # import os
- # import openai
- # from langchain.embeddings.openai import OpenAIEmbeddings
- # from langchain.chat_models import ChatOpenAI
- # from langchain.chains import ConversationalRetrievalChain
- # from langchain.vectorstores import FAISS
- # from langchain.vectorstores.base import Document
- # from langchain.memory import ConversationBufferMemory
- # from langchain.llms import OpenAI
- # os.environ["OPENAI_API_KEY"] = "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"
- # def read_csv_into_vector_document(file, text_cols):
- # with open(file, newline='',encoding='utf-8') as csv_file:
- # csv_reader = csv.DictReader(csv_file)
- # text_data = []
- # for row in csv_reader:
- # text = ' '.join([row[col] for col in text_cols])
- # text_data.append(text)
- # return [Document(page_content=text) for text in text_data]
- # data = read_csv_into_vector_document("Consolidated Chatgpt commentary_Nifty 50_Pilot1 - Copy.csv", ["Date", "NIFTY Closing", "change dod","NIFTY Commentary","Top 3 Gainers","Top 3 Losers"])
- # embeddings = OpenAIEmbeddings()
- # vectors = FAISS.from_documents(data, embeddings)
- # memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
- # chain = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0.5) , vectors.as_retriever(), memory=memory)
- # result = chain({"question": "What is this data all about"})
- # print(result['answer'])
- #pip install streamlit langchain openai faiss-cpu tiktoken
- # import streamlit as st
- # from streamlit_chat import message
- # from langchain.embeddings.openai import OpenAIEmbeddings
- # from langchain.chat_models import ChatOpenAI
- # from langchain.chains import ConversationalRetrievalChain
- # from langchain.document_loaders.csv_loader import CSVLoader
- # from langchain.vectorstores import FAISS
- # import tempfile
- # import os
- # from langchain.memory import ConversationBufferMemory
- # from langchain.llms import OpenAI
- # from langchain.vectorstores import Chroma
- # os.environ["OPENAI_API_KEY"] = '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'
- # # loader = CSVLoader(file_path="Consolidated Chatgpt commentary_Nifty 50_Pilot1.csv", encoding="utf-8")
- # loader = CSVLoader(file_path="CustomeCSV.csv", encoding="utf-8")
- # data = loader.load()
- # # embeddings = OpenAIEmbeddings()
- # # vectors = FAISS.from_documents(data, embeddings)
- # # memory = ConversationBufferMemory(
- # # memory_key='chat_history', return_messages=True)
- # # response = ConversationalRetrievalChain.from_llm(llm = ChatOpenAI(temperature=0,model_name='gpt-3.5-turbo', openai_api_key="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"),
- # # retriever=vectors.as_retriever(),memory=memory)
- # # # print(data[:500])
- # embeddings = OpenAIEmbeddings()
- # vectordb = Chroma.from_documents(data, embedding=embeddings,
- # persist_directory=".")
- # vectordb.persist()
- # memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
- # response = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0.2,model_name='gpt-3.5-turbo') , vectordb.as_retriever(), memory=memory)
- # result = response({"question": """
- # Your task is to summarise NIFTY Commentary in 100 words from date 3/8/2023 to 3/22/2023
- # """})
- # print(len(result))
- # print("----------------------------------------")
- # print(result['answer'])
[text] exl
Viewer
*** This page was generated with the meta tag "noindex, nofollow". This happened because you selected this option before saving or the system detected it as spam. This means that this page will never get into the search engines and the search bot will not crawl it. There is nothing to worry about, you can still share it with anyone.
Editor
You can edit this paste and save as new: