Skip to content

AskCode API Reference

askcode.main

This file contains the definition of AskCode main class

AskCode

AskCode(
    codebase_path,
    language,
    parser_threshold,
    text_splitter_chunk_size,
    text_splitter_chunk_overlap,
    use_HF,
    llm_model,
    embeddings_model,
    retriever_search_type,
    retriever_k,
    max_new_tokens,
    temperature,
    top_p,
    repetition_penalty,
    use_autogptq,
)
Source code in askcode/main.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def __init__(self,
             codebase_path: str,
             language: str,
             parser_threshold: int,
             text_splitter_chunk_size: int,
             text_splitter_chunk_overlap: int,
             use_HF: bool,
             llm_model: str,
             embeddings_model: str,
             retriever_search_type: str,
             retriever_k: int,
             max_new_tokens: int,
             temperature: float,
             top_p: float,
             repetition_penalty: float,
             use_autogptq: bool,
             ):
    self.codebase_path = Path(codebase_path)
    self.language = language
    self.parser_threshold = parser_threshold
    self.text_splitter_chunk_size = text_splitter_chunk_size
    self.text_splitter_chunk_overlap = text_splitter_chunk_overlap

    self.use_HF = use_HF
    self.llm_model = llm_model
    self.embeddings_model = embeddings_model

    self.retriever_search_type = retriever_search_type
    self.retriever_k = retriever_k

    self.use_autogptq = use_autogptq

    self.max_new_tokens = max_new_tokens
    self.temperature = temperature
    self.top_p = top_p
    self.repetition_penalty = repetition_penalty

setup

setup()

Sets up the Necessary components for the langchain chain

Returns:

Type Description
None

None

Source code in askcode/main.py
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def setup(self) -> None:
    """
     Sets up the Necessary components for the langchain chain

    :return: None
    """
    with console.status("[bold green]Loading files ...") as status:
        self.retriever = self.load_retriever()
        console.log(f"[bold green]Files loaded successfully")

    with console.status("[bold green]Loading LLM ...") as status:
        self.llm = self.load_llm()
        console.log(f"[bold green]LLM loaded successfully")

    self.prompt_template = self.get_prompt_template()

load_retriever

load_retriever()

Loads the files from the codebase and sets up the retriever

Source code in askcode/main.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def load_retriever(self):
    """
    Loads the files from the codebase and sets up the retriever
    """
    try:
        with open(self.codebase_path / '.gitignore') as f:
            exclude = f.readlines()
    except Exception as e:
        # no gitignore found
        exclude = []

    loader = GenericLoader.from_filesystem(
        self.codebase_path,
        glob="**/[!.]*",
        exclude=exclude,
        suffixes=[".py", ".js"],  # only python and javascript atm
        show_progress=True,
        parser=LanguageParser(language=self.language, parser_threshold=self.parser_threshold)
    )
    files = loader.load()
    splitter = RecursiveCharacterTextSplitter.from_language(language=self.language,
                                                   chunk_size=self.text_splitter_chunk_size,
                                                   chunk_overlap=self.text_splitter_chunk_overlap)
    docs = splitter.split_documents(files)

    if self.use_HF:
        db = Chroma.from_documents(docs, HuggingFaceEmbeddings(model_name=self.embeddings_model))
    else:
        # defaults to OpenAI
        from langchain.embeddings import OpenAIEmbeddings
        db = Chroma.from_documents(docs, OpenAIEmbeddings(disallowed_special=()))

    retriever = db.as_retriever(
        search_type=self.retriever_search_type,
        search_kwargs={"k": self.retriever_k},
    )

    return retriever

load_llm

load_llm()

Sets up the LLM

Source code in askcode/main.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
def load_llm(self):
    """
    Sets up the LLM
    """
    if self.use_HF:
        tokenizer = AutoTokenizer.from_pretrained(self.llm_model, use_fast=True)
        if self.use_autogptq:
            from auto_gptq import AutoGPTQForCausalLM
            model = AutoGPTQForCausalLM.from_quantized(self.llm_model, use_safetensors=True)
        else:
            model = AutoModelForCausalLM.from_pretrained(self.llm_model)

        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=self.max_new_tokens,
            temperature=self.temperature,
            top_p=self.top_p,
            repetition_penalty=self.repetition_penalty,
        )
        llm = HuggingFacePipeline(pipeline=pipe)
        return llm
    else:
        # defaults to OpenAI
        from langchain.chat_models import ChatOpenAI
        return ChatOpenAI(model_name=self.llm_model)

get_prompt_template

get_prompt_template()

Sets up the prompt template

Source code in askcode/main.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
def get_prompt_template(self):
    """
    Sets up the prompt template
    """
    template = """Use the following pieces of context to answer the question at the end. 
       If you don't know the answer, just say that you don't know, don't try to make up an answer. 
       Use three sentences maximum and keep the answer as concise as possible. 
       {context}
       Question: {question}
       Helpful Answer:"""

    prompt_template = PromptTemplate(
        input_variables=["context", "question"],
        template=template,
    )

    return prompt_template

chain

chain(retriever, llm, prompt_template, question)

Runs a question through Langchain Chain

Parameters:

Name Type Description Default
retriever

the docs Retriever

required
llm

the large language model

required
prompt_template

the prompt template

required
question str

the question

required

Returns:

Type Description

chain results

Source code in askcode/main.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def chain(self, retriever, llm, prompt_template, question: str):
    """
    Runs a question through Langchain Chain

    :param retriever: the docs Retriever
    :param llm: the large language model
    :param prompt_template: the prompt template
    :param question: the question

    :return: chain results
    """
    relevant_docs = retriever.get_relevant_documents(question)
    chain = load_qa_chain(llm, prompt=prompt_template)
    return chain({"input_documents": relevant_docs, "question": question}, return_only_outputs=False)

ask

ask(question)

Ask a question to the codebase You need to call self.setup before calling this function

Parameters:

Name Type Description Default
question str

the question :)

required

Returns:

Type Description

chain results

Source code in askcode/main.py
201
202
203
204
205
206
207
208
209
def ask(self, question: str):
    """
    Ask a question to the codebase
    You need to call `self.setup` before calling this function

    :param question: the question :)
    :return: chain results
    """
    return self.chain(self.retriever, self.llm, self.prompt_template, question)

askcode.cli

Command Line Interface

main

main(
    codebase_path=".",
    language="python",
    parser_threshold=0,
    text_splitter_chunk_size=256,
    text_splitter_chunk_overlap=50,
    use_HF=True,
    llm_model="TheBloke/CodeLlama-7B-GPTQ",
    embeddings_model="sentence-transformers/all-MiniLM-L12-v2",
    retriever_search_type="mmr",
    retriever_k=4,
    max_new_tokens=50,
    temperature=0.1,
    top_p=0.9,
    repetition_penalty=1.0,
    use_autogptq=True,
)

Chat with your code base with the power of LLMs.

Parameters:

Name Type Description Default
codebase_path str

path to your codebase

'.'
language str

programming language ['python', 'javascript'] at the moment

'python'
parser_threshold int

minimum lines needed to activate parsing (0 by default).

0
text_splitter_chunk_size int

Maximum size of chunks to return

256
text_splitter_chunk_overlap int

Overlap in characters between chunks

50
use_HF bool

use hugging face models, if False OpenAI models will be used

True
llm_model str

Large language model name (HF model name or OpenAI model)

'TheBloke/CodeLlama-7B-GPTQ'
embeddings_model str

Embeddings model (HF model name or OpenAI model)

'sentence-transformers/all-MiniLM-L12-v2'
retriever_search_type str

Defines the type of search that the Retriever should perform. Can be "similarity" (default), "mmr", or "similarity_score_threshold".

'mmr'
retriever_k int

Amount of documents to return (Default: 4)

4
max_new_tokens int

Maximum tokens to generate

50
temperature float

sampling temperature

0.1
top_p float

sampling top_p

0.9
repetition_penalty float

sampling repetition_penalty

1.0
use_autogptq bool

Set it to True to use Quantized AutoGPTQ models

True

Returns:

Type Description

None

Source code in askcode/cli.py
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def main(
    codebase_path: str = '.',
    language: str = 'python',
    parser_threshold: int = 0,
    text_splitter_chunk_size: int = 256,
    text_splitter_chunk_overlap: int = 50,
    use_HF: bool = True,
    llm_model: str = "TheBloke/CodeLlama-7B-GPTQ",
    embeddings_model: str = "sentence-transformers/all-MiniLM-L12-v2",
    retriever_search_type: str = "mmr",
    retriever_k: int = 4,
    max_new_tokens: int = 50,
    temperature: float = 0.1,
    top_p: float = 0.9,
    repetition_penalty: float = 1.,
    use_autogptq: bool = True):
    """
    Chat with your code base with the power of LLMs.

    :param codebase_path: path to your codebase
    :param language: programming language ['python', 'javascript'] at the moment
    :param parser_threshold: minimum lines needed to activate parsing (0 by default).
    :param text_splitter_chunk_size: Maximum size of chunks to return
    :param text_splitter_chunk_overlap: Overlap in characters between chunks
    :param use_HF: use hugging face models, if False OpenAI models will be used
    :param llm_model: Large language model name (HF model name or OpenAI model)
    :param embeddings_model: Embeddings model (HF model name or OpenAI model)
    :param retriever_search_type: Defines the type of search that
                the Retriever should perform.
                Can be "similarity" (default), "mmr", or
                "similarity_score_threshold".
    :param retriever_k: Amount of documents to return (Default: 4)
    :param max_new_tokens: Maximum tokens to generate
    :param temperature: sampling temperature
    :param top_p: sampling top_p
    :param repetition_penalty: sampling repetition_penalty
    :param use_autogptq: Set it to True to use Quantized AutoGPTQ models

    :return: None
    """

    ask_code = AskCode(codebase_path,
                 language,
                 parser_threshold,
                 text_splitter_chunk_size,
                 text_splitter_chunk_overlap,
                 use_HF,
                 llm_model,
                 embeddings_model,
                 retriever_search_type,
                 retriever_k,
                 max_new_tokens,
                 temperature,
                 top_p,
                 repetition_penalty,
                 use_autogptq)
    console.print(__header__, style="blue")
    ask_code.setup()
    console.print("CTRL+C To stop ...", style="bold red")
    print()
    while True:
        try:
            q = console.input("[yellow][-] How can I help you: ")
            with console.status("[bold green]Searching ...") as status:
                res = ask_code.ask(q)
                ans = enforce_stop_tokens(res['output_text'], ["Question"])
                # ans = res['output_text']
                console.print(f"[+] Answer: {ans}", style="bold green")
        except KeyboardInterrupt:
            break