# hex2context
```elixir
Mix.install([
{:sqlite_vec, github: "joelpaulkoch/sqlite_vec"},
{:ecto, "~> 3.12"},
{:ecto_sql, "~> 3.12"},
{:ecto_sqlite3, "~> 0.17.2"},
{:kino, "~> 0.14.1"},
{:nx, "~> 0.9.1"},
{:bumblebee, "~> 0.6.0"},
{:exla, "~> 0.9.0"},
{:axon, "~> 0.7.0"},
{:text_chunker, "~> 0.3.1"},
{:req, "~> 0.5.8"},
{:plug, "~> 1.16"}
])
Nx.global_default_backend(EXLA.Backend)
```
## Documentation
_`hex2context` is a [Livebook](https://livebook.dev) companion to `hex2txt`._
### Introduction
`hex2context` uses Retrieval-Augmented Generation (RAG) to include only the most relevant snippets of documentation from the `llms.txt` files generated by [`hex2txt`](https://hex2txt.fly.dev).
(Excluding documentation that is not as relevant to the task-at-hand make more efficient use of limited LLM context window sizes, and reduces inference costs.)
### Usage
#### Step 1: Generate Embeddings
First, generate embeddings for every combination of package/version that you will be interested in querying later. For example:
<!-- livebook:{"force_markdown":true} -->
```elixir
Hex2context.ingest_docs([
{"phoenix", "1.7.18"},
{"phoenix_html", "4.1.1"},
{"phoenix_live_view", "1.0.1"},
{"flop", "0.26.1"},
{"flop_phoenix", "0.23.1"},
{"ecto", "3.12.5"}
])
```
Embeddings are generated locally (i.e., on the computer running this Livebook) and durably written to a Sqlite database in the same folder on disk that this notebook is running from. For larger packages (like many in the example above), generating these embeddings may take some time. However, once persisted, they do not need to be re-computed.
#### Step 2: Query for Relevant Documentation Snippets
This notebook uses [`Kino.Proxy`](https://hexdocs.pm/kino/Kino.Proxy.html) to expose the following HTTP API:
* `GET /proxy/sessions/:id/:package_name/:package_version?query=:query`
Here:
* `:id` is the identifier of the current Livebook session (check your browser's address bar for the identifier of your current session; [more info](https://news.livebook.dev/livebook-0.13-expose-an-http-api-from-your-notebook-2wE6GY))
* `:package_name` is the name of the package to query documentation for
* `:package_version` is the version number of the package to query documentation for
* `:query` is the (URL-encoded) query used for similarity search
Documentation relevant to the provided `:query` (for the indicated package) will be returned from this endpoint, and can be fed in directly to your AI coding assistnat of choice.
Note that this endpoint will return an error if embeddings for the indicated package (name _and_ version) have not been ingested yet.
For example:
```
curl "http://localhost:52039/proxy/sessions/6lq7fwziy23shg77c7vjzjtqnoy4hlpfkzhbsqsiuel7vaqr/phoenix_live_view/1.0.0/?query=phx-click"
```
### Thanks
With thanks to the following resources:
* https://github.com/dwyl/rag-elixir-doc/
* https://github.com/dwyl/rag-elixir-doc/blob/main/rag-elixir.livemd
* https://bitcrowd.dev/a-rag-for-elixir-in-elixir/
* https://gist.github.com/joelpaulkoch/9192abd23bd2e6ff76be314c24173974
## Infrastructure (DB)
```elixir
defmodule Hex2context.Repo do
use Ecto.Repo,
otp_app: :hex2context,
adapter: Ecto.Adapters.SQLite3
end
```
```elixir
Kino.start_child(
{Hex2context.Repo,
database: Path.join(__DIR__, "hex2context.db"), load_extensions: [SqliteVec.path()]}
)
```
```elixir
defmodule Hex2context.Repo.Migrations.CreateEmbeddingsTable do
use Ecto.Migration
def up do
execute(~s"
CREATE TABLE embeddings(
id INTEGER PRIMARY KEY,
package_name TEXT NOT NULL,
package_version TEXT NOT NULL,
doc_chunk TEXT NOT NULL,
embedding FLOAT[384] NOT NULL,
UNIQUE(package_name, package_version, doc_chunk)
);
CREATE INDEX idx_package ON embeddings(package_name, package_version);
")
end
def down do
execute("DROP TABLE embeddings")
end
end
```
```elixir
defmodule Hex2context.Repo.Migrations do
alias Hex2context.Repo.Migrations.CreateEmbeddingsTable
def migrate, do: Ecto.Migrator.up(Hex2context.Repo, 1, CreateEmbeddingsTable)
def rollback, do: Ecto.Migrator.down(Hex2context.Repo, 1, CreateEmbeddingsTable)
end
```
```elixir
Hex2context.Repo.Migrations.migrate()
```
## Infrastructure (ML)
```elixir
defmodule Hex2context.Serving do
def build_serving_for_embeddings() do
repo = {:hf, "sentence-transformers/all-MiniLM-L6-v2"}
{:ok, model_info} = Bumblebee.load_model(repo)
{:ok, tokenizer} = Bumblebee.load_tokenizer(repo)
Bumblebee.Text.text_embedding(
model_info,
tokenizer,
output_pool: :mean_pooling,
output_attribute: :hidden_state,
embedding_processor: :l2_norm,
compile: [batch_size: 1, sequence_length: [2000]],
defn_options: [compiler: EXLA]
)
end
end
```
```elixir
Kino.start_child(
{Nx.Serving,
serving: Hex2context.Serving.build_serving_for_embeddings(),
name: Hex2context.EmbeddingServing,
batch_timeout: 100}
)
```
## Schema
```elixir
defmodule Hex2context.Embedding do
use Ecto.Schema
schema "embeddings" do
field(:embedding, SqliteVec.Ecto.Float32)
field(:package_name, :string)
field(:package_version, :string)
field(:doc_chunk, :string)
end
end
```
## Application (RAG)
```elixir
defmodule Hex2context do
require Logger
import Ecto.Query
import SqliteVec.Ecto.Query
alias Hex2context.Embedding
@hex2txt "https://hex2txt.fly.dev"
@req Req.new()
@chunk_retrieval_limit 20
def ingest_docs(packages_list) when is_list(packages_list) do
for {package_name, package_version} <- packages_list do
ingest_docs(package_name, package_version)
end
:ok
end
def ingest_docs(package_name, package_version, opts \\ []) do
force_refresh? = Keyword.get(opts, :force, false)
already_loaded? = has_docs?(package_name, package_version)
case {already_loaded?, force_refresh?} do
{true, false} ->
Logger.info("Docs already ingested for #{package_name} v#{package_version}; specify `force: true` to refresh")
:ok
_ ->
fetch_docs_and_generate_embeddings(package_name, package_version) |>
persist_embeddings(package_name, package_version)
:ok
end
end
def retrieve_docs(package_name, package_version, query) do
with true <- has_docs?(package_name, package_version) do
%{embedding: tensor} = Nx.Serving.batched_run(Hex2context.EmbeddingServing, query)
query_embedding = SqliteVec.Float32.new(tensor)
doc_chunks = Hex2context.Repo.all(
from(i in Embedding,
where: i.package_name == ^package_name,
where: i.package_version == ^package_version,
order_by: vec_distance_L2(i.embedding, vec_f32(query_embedding)),
limit: ^@chunk_retrieval_limit,
select: i.doc_chunk
)
)
{:ok, doc_chunks}
else
false ->
{:err,
"No docs for #{package_name} v#{package_version}; must pre-generate with `Hex2context.ingest_docs/2`"}
end
end
defp has_docs?(package_name, package_version) do
Hex2context.Repo.exists?(
from(i in Embedding,
where: i.package_name == ^package_name,
where: i.package_version == ^package_version
)
)
end
defp fetch_docs_and_generate_embeddings(package_name, package_version) do
url = @hex2txt <> "/" <> package_name <> "/" <> package_version <> "/llms.txt"
docs = Req.get!(@req, url: url, http_errors: :raise).body
chunks =
docs
|> TextChunker.split(format: :markdown)
|> Enum.map(fn chunk ->
%TextChunker.Chunk{chunk | text: String.trim(chunk.text)}
end)
chunk_count = Enum.count(chunks)
Logger.info(
"Fetched #{chunk_count} documentation chunks for #{package_name} v#{package_version}"
)
embeddings =
chunks
|> Enum.map(& &1.text)
|> Enum.with_index()
|> Enum.map(fn {chunk, index} ->
Logger.info("Computing embedding for chunk #{index}/#{chunk_count}...")
Nx.Serving.batched_run(Hex2context.EmbeddingServing, chunk)
end)
Logger.info("Finished computing embeddings for #{chunk_count} chunks")
{chunks, embeddings}
end
defp persist_embeddings({chunks, embeddings}, package_name, package_version) do
for {%TextChunker.Chunk{text: text}, %{embedding: tensor}} <- Enum.zip(chunks, embeddings) do
Hex2context.Repo.insert(
%Embedding{
embedding: SqliteVec.Float32.new(tensor),
package_name: package_name,
package_version: package_version,
doc_chunk: text
},
on_conflict: :replace_all
)
end
end
end
```
## HTTP API Server
```elixir
defmodule Hex2context.API do
use Plug.Router
plug :match
plug Plug.Parsers, parsers: [:urlencoded]
plug :dispatch
get "/:package_name/:package_version" do
query = conn.params["query"] || ""
case Hex2context.retrieve_docs(package_name, package_version, query) do
{:err, message} ->
send_resp(conn, 404, message)
{:ok, doc_chunks} ->
send_resp(conn, 200, doc_chunks |> Enum.join("\n\n"))
_ ->
send_resp(conn, 500, "Unable to retrieve documentation")
end
end
match _ do
send_resp(conn, 404, "Not found")
end
end
```
```elixir
Kino.Proxy.listen(Hex2context.API)
```
## Playground
```elixir
Hex2context.ingest_docs([
{"flop", "0.26.1"},
{"flop_phoenix", "0.23.1"},
])
```
```elixir
Hex2context.retrieve_docs("flop", "0.26.1", "filter date")
```
```elixir
Hex2context.ingest_docs("geo", "4.0.1", force: true)
```
```elixir
Hex2context.retrieve_docs("geo", "4.0.1", "WKB")
```