From ed5734ae25edadb631e9de58d1f10f9c50e18c00 Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Mon, 18 Mar 2024 14:19:43 -0400
Subject: [PATCH] test/fix: OSX Test Repair (#1843)

* test with gguf instead of ggml. Updates testPrompt to match? Adds debugging line to Dockerfile that I've found helpful recently.

* fix testPrompt slightly

* Sad Experiment: Test GH runner without metal?

* break apart CGO_LDFLAGS

* switch runner

* upstream llama.cpp disables Metal on Github CI!

* missed a dir from clean-tests

* CGO_LDFLAGS

* tmate failure + NO_ACCELERATE

* whisper.cpp has a metal fix

* do the exact opposite of the name of this branch, but keep it around for unrelated fixes?

* add back newlines

* add tmate to linux for testing

* update fixtures

* timeout for tmate
---
 .github/workflows/test.yml        | 12 ++++++++++--
 Dockerfile                        |  1 +
 Makefile                          | 19 +++++++++++++++----
 backend/cpp/llama/Makefile        |  5 +++++
 core/http/api_test.go             |  8 ++++----
 tests/models_fixtures/config.yaml |  4 ++--
 tests/models_fixtures/gpt4.yaml   |  2 +-
 tests/models_fixtures/gpt4_2.yaml |  2 +-
 8 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2a2cc6c8..8222508a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -105,9 +105,13 @@ jobs:
       - name: Test
         run: |
           GO_TAGS="stablediffusion tts" make test
+      - name: Setup tmate session if tests fail
+        if: ${{ failure() }}
+        uses: mxschmitt/action-tmate@v3
+        timeout-minutes: 5
 
   tests-apple:
-    runs-on: macOS-latest
+    runs-on: macOS-14
     strategy:
       matrix:
         go-version: ['1.21.x']
@@ -130,4 +134,8 @@ jobs:
         run: |
           export C_INCLUDE_PATH=/usr/local/include
           export CPLUS_INCLUDE_PATH=/usr/local/include
-          CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
\ No newline at end of file
+          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
+      - name: Setup tmate session if tests fail
+        if: ${{ failure() }}
+        uses: mxschmitt/action-tmate@v3
+        timeout-minutes: 5
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index ebda80ba..b083690e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -108,6 +108,7 @@ WORKDIR /build
 
 COPY . .
 COPY .git .
+RUN echo "GO_TAGS: $GO_TAGS"
 RUN make prepare
 
 # If we are building with clblas support, we need the libraries for the builds
diff --git a/Makefile b/Makefile
index 8bbc0625..ff7ec797 100644
--- a/Makefile
+++ b/Makefile
@@ -70,7 +70,7 @@ UNAME_S := $(shell uname -s)
 endif
 
 ifeq ($(OS),Darwin)
-	CGO_LDFLAGS += -lcblas -framework Accelerate
+	
 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
 	endif
@@ -81,6 +81,12 @@ ifeq ($(OS),Darwin)
 	# disable metal if on Darwin and any other value is explicitly passed.
 	else ifneq ($(BUILD_TYPE),metal)
 		CMAKE_ARGS+=-DLLAMA_METAL=OFF
+		export LLAMA_NO_ACCELERATE=1
+	endif
+
+	ifeq ($(BUILD_TYPE),metal)
+#			-lcblas 	removed: it seems to always be listed as a duplicate flag.
+		CGO_LDFLAGS += -framework Accelerate
 	endif
 endif
 
@@ -286,6 +292,11 @@ clean: ## Remove build related file
 	$(MAKE) -C backend/cpp/llama clean
 	$(MAKE) dropreplace
 
+clean-tests:
+	rm -rf test-models
+	rm -rf test-dir
+	rm -rf core/http/backend-assets
+
 ## Build:
 build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
@@ -305,10 +316,10 @@ osx-signed: build
 run: prepare ## run local-ai
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
 
-test-models/testmodel:
+test-models/testmodel.ggml:
 	mkdir test-models
 	mkdir test-dir
-	wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel
+	wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
 	wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
 	wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
@@ -320,7 +331,7 @@ prepare-test: grpcs
 	cp -rf backend-assets core/http
 	cp tests/models_fixtures/* test-models
 
-test: prepare test-models/testmodel grpcs
+test: prepare test-models/testmodel.ggml grpcs
 	@echo 'Running tests'
 	export GO_TAGS="tts stablediffusion"
 	$(MAKE) prepare-test
diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile
index 8502ae2f..3d31284a 100644
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -19,6 +19,11 @@ else ifeq ($(BUILD_TYPE),clblas)
 else ifeq ($(BUILD_TYPE),hipblas)
 	CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
 # If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
+# But if it's OSX without metal, disable it here
+else ifeq ($(OS),darwin)
+	ifneq ($(BUILD_TYPE),metal)
+		CMAKE_ARGS+=-DLLAMA_METAL=OFF
+	endif
 endif
 
 ifeq ($(BUILD_TYPE),sycl_f16)
diff --git a/core/http/api_test.go b/core/http/api_test.go
index b0579a19..ca69e8bf 100644
--- a/core/http/api_test.go
+++ b/core/http/api_test.go
@@ -666,15 +666,15 @@ var _ = Describe("API test", func() {
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
 		})
-		It("can generate completions", func() {
-			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt})
+		It("can generate completions via ggml", func() {
+			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(resp.Choices)).To(Equal(1))
 			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
 		})
 
-		It("can generate chat completions ", func() {
-			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
+		It("can generate chat completions via ggml", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(resp.Choices)).To(Equal(1))
 			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
diff --git a/tests/models_fixtures/config.yaml b/tests/models_fixtures/config.yaml
index 749d1699..f61c2a7c 100644
--- a/tests/models_fixtures/config.yaml
+++ b/tests/models_fixtures/config.yaml
@@ -1,6 +1,6 @@
 - name: list1
   parameters:
-    model: testmodel
+    model: testmodel.ggml
     top_p: 80
     top_k: 0.9
     temperature: 0.1
@@ -19,7 +19,7 @@
     top_p: 80
     top_k: 0.9
     temperature: 0.1
-    model: testmodel
+    model: testmodel.ggml
   context_size: 200
   stopwords:
   - "HUMAN:"
diff --git a/tests/models_fixtures/gpt4.yaml b/tests/models_fixtures/gpt4.yaml
index 652a407c..43e77586 100644
--- a/tests/models_fixtures/gpt4.yaml
+++ b/tests/models_fixtures/gpt4.yaml
@@ -1,6 +1,6 @@
 name: gpt4all
 parameters:
-  model: testmodel
+  model: testmodel.ggml
   top_p: 80
   top_k: 0.9
   temperature: 0.1
diff --git a/tests/models_fixtures/gpt4_2.yaml b/tests/models_fixtures/gpt4_2.yaml
index 904693ca..8a211153 100644
--- a/tests/models_fixtures/gpt4_2.yaml
+++ b/tests/models_fixtures/gpt4_2.yaml
@@ -1,6 +1,6 @@
 name: gpt4all-2
 parameters:
-  model: testmodel
+  model: testmodel.ggml
   top_p: 80
   top_k: 0.9
   temperature: 0.1