QA
Modify Documents
The ModifyDocumentsContentChain can be used for document summarization (for example).
// create the llm chain which is used for summarization
LargeLanguageModelChain llmChain = new OpenAiChatCompletionsChain(
        PromptTemplates.QA_SUMMARIZE, 
        new OpenAiChatCompletionsParameters().temperature(0D).model("gpt-3.5-turbo"),
        System.getenv("OPENAI_API_KEY"));
// create the ModifyDocumentsContentChain which is used to apply the llm chain to each passed document
ModifyDocumentsContentChain summarizeDocumentsChain = new ModifyDocumentsContentChain(llmChain);
// create some example documents
Map<String, String> myFirstDocument = new HashMap<String, String>();
myFirstDocument.put(PromptConstants.CONTENT, "this is my first document content");
myFirstDocument.put(PromptConstants.SOURCE, "this is my first document source");
// the default summarize prompt PromptTemplates.QA_SUMMARIZE also expects the question used for retrieval in the document
myFirstDocument.put(PromptConstants.QUESTION, "who is John Doe?");
Map<String, String> mySecondDocument = new HashMap<String, String>();
mySecondDocument.put(PromptConstants.CONTENT, "this is my second document content");
mySecondDocument.put(PromptConstants.SOURCE, "this is my second document source");
mySecondDocument.put(PromptConstants.QUESTION, "how old is John Doe?"); // see comment above
// input for the summarize chain is a stream of documents
Stream<Map<String, String>> documents = Stream.of(myFirstDocument, mySecondDocument);
// output contains the passed documents with summarized content-Value
Stream<Map<String, String>> summarizedDocuments = summarizeDocumentsChain.run(documents);
Combine Documents
CombineDocumentsChain combineDocumentsChain = new CombineDocumentsChain();
Map<String, String> myFirstDocument = new HashMap<String, String>();
myFirstDocument.put(PromptConstants.CONTENT, "this is my first document content");
myFirstDocument.put(PromptConstants.SOURCE, "this is my first document source");
Map<String, String> mySecondDocument = new HashMap<String, String>();
mySecondDocument.put(PromptConstants.CONTENT, "this is my second document content");
mySecondDocument.put(PromptConstants.SOURCE, "this is my second document source");
Stream<Map<String, String>> documents = Stream.of(myFirstDocument, mySecondDocument);
Map<String, String> combinedDocument = combineDocumentsChain.run(documents);
/* 
 * Content: this is my first document content
 * Source: this is my first document source
 *
 * Content: this is my second document content
 * Source: this is my second document source
 * 
 * (stored with key "content" inside the map)
 */
Map LLM results to answers with sources
MapAnswerWithSourcesChain mapAnswerWithSourcesChain = new MapAnswerWithSourcesChain();
AnswerWithSources answerWithSources = mapAnswerWithSourcesChain.run("The answer is bla bla bla.\nSOURCES: page 1 book xy, page 2 book ab");
System.out.println(answerWithSources.getAnswer());  // The answer is bla bla bla.
System.out.println(answerWithSources.getSources()); // [page 1 book xy, page 2 book ab]
Split Documents
// 1. Create Documents
List<Map<String, String>> documents = new LinkedList<>();
Map<String, String> firstDocument = new LinkedHashMap<>();
firstDocument.put(PromptConstants.SOURCE, "book of john");
firstDocument.put(PromptConstants.CONTENT, "This is a short text. This is another short text.");
documents.add(firstDocument);
Map<String, String> secondDocument = new LinkedHashMap<>();
secondDocument.put(PromptConstants.SOURCE, "book of jane");
secondDocument.put(PromptConstants.CONTENT, "This is a short text.");
documents.add(secondDocument);
// 2. Split Documents
/*
 * We create a TextSplitter that splits a text into partitions using a JTokkit
 * Encoding. We use the cl100k_base encoding (which btw is the default for
 * gpt-3.5-turbo)
 */
TextSplitter textSplitter = new JtokkitTextSplitter(
        Encodings.newDefaultEncodingRegistry().getEncoding(EncodingType.CL100K_BASE), 10);
/*
 * we now instantiate the SplitDocumentsChain which will split our documents
 * using the above created TextSplitter on the "content" field.
 */
SplitDocumentsChain splitDocumentsChain = new SplitDocumentsChain(textSplitter);
List<Map<String, String>> splitDocuments = splitDocumentsChain.run(documents.stream())
        .collect(Collectors.toList());
// splitDocuments: [
//   {content=This is a short text. , source=book of john},
//   {content=This is another short text., source=book of john},
//   {content=This is a short text., source=book of jane}
// ]