<!-- livebook:{"app_settings":{"access_type":"public","auto_shutdown_ms":3600000,"show_source":true,"slug":"controlnet"}} -->
# SD ControlNet
```elixir
Mix.install([
{
:bumblebee,
git: "https://github.com/joelpaulkoch/bumblebee.git",
ref: "c109b1a131f6e23d6130092df887a991068c7d3c"
# branch: "SD_controlnet"
},
{:axon, "~> 0.6.0", override: true},
{:nx,
github: "elixir-nx/nx",
ref: "bead39ccd703cd9243e985659b2905eea8977862",
sparse: "nx",
override: true},
{:exla,
github: "elixir-nx/nx",
ref: "bead39ccd703cd9243e985659b2905eea8977862",
sparse: "exla",
override: true},
{:kino, "~> 0.12.3"},
{:nx_image, "~> 0.1.2"},
{:stb_image, "~> 0.6.5"}
])
Nx.global_default_backend(EXLA.Backend)
```
## Create Serving
```elixir
# repository_id = "CompVis/stable-diffusion-v1-4"
repository_id = "runwayml/stable-diffusion-v1-5"
# repository_id = "SG161222/Realistic_Vision_V1.4"
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/clip-vit-large-patch14"})
{:ok, clip} = Bumblebee.load_model({:hf, repository_id, subdir: "text_encoder"})
{:ok, unet} =
Bumblebee.load_model({:hf, repository_id, subdir: "unet"},
architecture: :with_controlnet
)
# controlnet_repo = "lllyasviel/sd-controlnet-hed"
controlnet_repo = "lllyasviel/sd-controlnet-scribble"
{:ok, controlnet} = Bumblebee.load_model({:hf, controlnet_repo})
{:ok, vae} =
Bumblebee.load_model({:hf, repository_id, subdir: "vae"}, architecture: :decoder)
{:ok, scheduler} = Bumblebee.load_scheduler({:hf, repository_id, subdir: "scheduler"})
{:ok, featurizer} =
Bumblebee.load_featurizer({:hf, repository_id, subdir: "feature_extractor"})
{:ok, safety_checker} = Bumblebee.load_model({:hf, repository_id, subdir: "safety_checker"})
serving =
Bumblebee.Diffusion.StableDiffusionControlNet.text_to_image(
clip,
unet,
vae,
controlnet,
tokenizer,
scheduler,
num_steps: 20,
safety_checker: safety_checker,
safety_checker_featurizer: featurizer,
compile: [batch_size: 1, sequence_length: 60],
defn_options: [compiler: EXLA]
)
```
<!-- livebook:{"branch_parent_index":0} -->
## Scribble bird
```elixir
Kino.Text.new("Scribble Bird") |> Kino.render()
bird_file_input =
Kino.Input.text("Bird file path", default: "/public-apps/scribble_bird.png")
```
```elixir
bird_file = Kino.Input.read(bird_file_input)
bird_image = StbImage.read_file!(bird_file) |> StbImage.to_nx()
bird_image_tensor =
bird_image
|> Nx.reshape({512, 512, 3})
|> NxImage.to_continuous(0, 1)
Kino.Image.new(bird_image)
```
```elixir
bird_prompt = "a bird, high quality, detailed"
bird_negative_prompt = "ugly, low quality, blurry"
Kino.Layout.grid([
Kino.Text.new("prompt: " <> bird_prompt),
Kino.Text.new("negative prompt:" <> bird_negative_prompt)
])
```
```elixir
Kino.Text.new("Generate image ...") |> Kino.render()
output =
Nx.Serving.run(serving, %{
prompt: bird_prompt,
negative_prompt: bird_negative_prompt,
controlnet_conditioning: bird_image_tensor
})
for result <- output.results do
Kino.Image.new(result.image)
end
|> Kino.Layout.grid(columns: 2)
```
## Your own input
```elixir
Kino.Text.new("Scribble image") |> Kino.render()
cond_inp = Kino.Input.image("cond_image", size: {512, 512}, fit: :pad)
```
```elixir
cond_image_tensor =
Kino.Input.read(cond_inp).file_ref
|> Kino.Input.file_path()
|> File.read!()
|> Nx.from_binary(:u8)
|> Nx.reshape({512, 512, 3})
|> NxImage.to_continuous(0, 1)
prompt_input =
Kino.Input.text("Prompt", default: "a bird, high quality, detailed")
negative_prompt_input = Kino.Input.text("Negative Prompt", default: "ugly, low quality, blurry")
Kino.Layout.grid([prompt_input, negative_prompt_input])
```
```elixir
Kino.Text.new("Generate image ...") |> Kino.render()
prompt = Kino.Input.read(prompt_input)
negative_prompt = Kino.Input.read(negative_prompt_input)
output =
Nx.Serving.run(serving, %{
prompt: prompt,
negative_prompt: negative_prompt,
controlnet_conditioning: cond_image_tensor
})
for result <- output.results do
Kino.Image.new(result.image)
end
|> Kino.Layout.grid(columns: 2)
```