QA
Modify Documents
The ModifyDocumentsContentChain can be used for document summarization (for example).
// create the llm chain which is used for summarization
LargeLanguageModelChain llmChain = new OpenAiChatCompletionsChain(
PromptTemplates.QA_SUMMARIZE,
new OpenAiChatCompletionsParameters().temperature(0D).model("gpt-3.5-turbo"),
System.getenv("OPENAI_API_KEY"));
// create the ModifyDocumentsContentChain which is used to apply the llm chain to each passed document
ModifyDocumentsContentChain summarizeDocumentsChain = new ModifyDocumentsContentChain(llmChain);
// create some example documents
Map<String, String> myFirstDocument = new HashMap<String, String>();
myFirstDocument.put(PromptConstants.CONTENT, "this is my first document content");
myFirstDocument.put(PromptConstants.SOURCE, "this is my first document source");
// the default summarize prompt PromptTemplates.QA_SUMMARIZE also expects the question used for retrieval in the document
myFirstDocument.put(PromptConstants.QUESTION, "who is John Doe?");
Map<String, String> mySecondDocument = new HashMap<String, String>();
mySecondDocument.put(PromptConstants.CONTENT, "this is my second document content");
mySecondDocument.put(PromptConstants.SOURCE, "this is my second document source");
mySecondDocument.put(PromptConstants.QUESTION, "how old is John Doe?"); // see comment above
// input for the summarize chain is a stream of documents
Stream<Map<String, String>> documents = Stream.of(myFirstDocument, mySecondDocument);
// output contains the passed documents with summarized content-Value
Stream<Map<String, String>> summarizedDocuments = summarizeDocumentsChain.run(documents);
Combine Documents
CombineDocumentsChain combineDocumentsChain = new CombineDocumentsChain();
Map<String, String> myFirstDocument = new HashMap<String, String>();
myFirstDocument.put(PromptConstants.CONTENT, "this is my first document content");
myFirstDocument.put(PromptConstants.SOURCE, "this is my first document source");
Map<String, String> mySecondDocument = new HashMap<String, String>();
mySecondDocument.put(PromptConstants.CONTENT, "this is my second document content");
mySecondDocument.put(PromptConstants.SOURCE, "this is my second document source");
Stream<Map<String, String>> documents = Stream.of(myFirstDocument, mySecondDocument);
Map<String, String> combinedDocument = combineDocumentsChain.run(documents);
/*
* Content: this is my first document content
* Source: this is my first document source
*
* Content: this is my second document content
* Source: this is my second document source
*
* (stored with key "content" inside the map)
*/
Map LLM results to answers with sources
MapAnswerWithSourcesChain mapAnswerWithSourcesChain = new MapAnswerWithSourcesChain();
AnswerWithSources answerWithSources = mapAnswerWithSourcesChain.run("The answer is bla bla bla.\nSOURCES: page 1 book xy, page 2 book ab");
System.out.println(answerWithSources.getAnswer()); // The answer is bla bla bla.
System.out.println(answerWithSources.getSources()); // [page 1 book xy, page 2 book ab]
Split Documents
// 1. Create Documents
List<Map<String, String>> documents = new LinkedList<>();
Map<String, String> firstDocument = new LinkedHashMap<>();
firstDocument.put(PromptConstants.SOURCE, "book of john");
firstDocument.put(PromptConstants.CONTENT, "This is a short text. This is another short text.");
documents.add(firstDocument);
Map<String, String> secondDocument = new LinkedHashMap<>();
secondDocument.put(PromptConstants.SOURCE, "book of jane");
secondDocument.put(PromptConstants.CONTENT, "This is a short text.");
documents.add(secondDocument);
// 2. Split Documents
/*
* We create a TextSplitter that splits a text into partitions using a JTokkit
* Encoding. We use the cl100k_base encoding (which btw is the default for
* gpt-3.5-turbo)
*/
TextSplitter textSplitter = new JtokkitTextSplitter(
Encodings.newDefaultEncodingRegistry().getEncoding(EncodingType.CL100K_BASE), 10);
/*
* we now instantiate the SplitDocumentsChain which will split our documents
* using the above created TextSplitter on the "content" field.
*/
SplitDocumentsChain splitDocumentsChain = new SplitDocumentsChain(textSplitter);
List<Map<String, String>> splitDocuments = splitDocumentsChain.run(documents.stream())
.collect(Collectors.toList());
// splitDocuments: [
// {content=This is a short text. , source=book of john},
// {content=This is another short text., source=book of john},
// {content=This is a short text., source=book of jane}
// ]