Parse https://ollama.com/library/ syntax (#11480 )

People search for ollama models using the web ui, this change allows one to copy the url from the browser and for it to be compatible with llama-run. Signed-off-by: Eric Curtin <ecurtin@redhat.com>
sync : ggml
2026-07-04 11:37:41 +02:00 · 2025-01-29 11:23:10 +00:00 · 2025-01-29 11:25:29 +02:00 · 2025-01-29 11:24:53 +02:00 · 2025-01-29 11:24:51 +02:00
4 changed files with 26 additions and 30 deletions
@@ -674,36 +674,27 @@ class LlamaData {
    }

    int github_dl(const std::string & model, const std::string & bn) {
-        std::string repository = model;
-        std::string branch     = "main";
-        size_t      at_pos     = model.find('@');
+        std::string  repository = model;
+        std::string  branch     = "main";
+        const size_t at_pos     = model.find('@');
        if (at_pos != std::string::npos) {
            repository = model.substr(0, at_pos);
            branch     = model.substr(at_pos + 1);
        }

-        std::vector<std::string> repo_parts;
-        size_t                   start = 0;
-        for (size_t end = 0; (end = repository.find('/', start)) != std::string::npos; start = end + 1) {
-            repo_parts.push_back(repository.substr(start, end - start));
-        }
-
-        repo_parts.push_back(repository.substr(start));
+        const std::vector<std::string> repo_parts = string_split(repository, "/");
        if (repo_parts.size() < 3) {
            printe("Invalid GitHub repository format\n");
            return 1;
        }

-        const std::string org          = repo_parts[0];
-        const std::string project      = repo_parts[1];
-        std::string       project_path = repo_parts[2];
-        for (size_t i = 3; i < repo_parts.size(); ++i) {
-            project_path += "/" + repo_parts[i];
+        const std::string & org          = repo_parts[0];
+        const std::string & project      = repo_parts[1];
+        std::string         url          = "https://raw.githubusercontent.com/" + org + "/" + project + "/" + branch;
+        for (size_t i = 2; i < repo_parts.size(); ++i) {
+            url += "/" + repo_parts[i];
        }

-        const std::string url =
-            "https://raw.githubusercontent.com/" + org + "/" + project + "/" + branch + "/" + project_path;
-
        return download(url, bn, true);
    }

@@ -735,19 +726,20 @@ class LlamaData {
        }

        const std::string bn = basename(model_);
-        if (string_starts_with(model_, "hf://") || string_starts_with(model_, "huggingface://")) {
+        if (string_starts_with(model_, "hf://") || string_starts_with(model_, "huggingface://") ||
+            string_starts_with(model_, "hf.co/")) {
+            rm_until_substring(model_, "hf.co/");
            rm_until_substring(model_, "://");
            ret = huggingface_dl(model_, bn);
-        } else if (string_starts_with(model_, "hf.co/")) {
-            rm_until_substring(model_, "hf.co/");
-            ret = huggingface_dl(model_, bn);
-        } else if (string_starts_with(model_, "https://") || string_starts_with(model_, "http://")) {
+        } else if ((string_starts_with(model_, "https://") || string_starts_with(model_, "http://")) &&
+                   !string_starts_with(model_, "https://ollama.com/library/")) {
            ret = download(model_, bn, true);
        } else if (string_starts_with(model_, "github:") || string_starts_with(model_, "github://")) {
-            rm_until_substring(model_, "github://");
            rm_until_substring(model_, "github:");
+            rm_until_substring(model_, "://");
            ret = github_dl(model_, bn);
        } else {  // ollama:// or nothing
+            rm_until_substring(model_, "ollama.com/library/");
            rm_until_substring(model_, "://");
            ret = ollama_dl(model_, bn);
        }
@@ -1302,7 +1302,7 @@ struct ggml_threadpool {
    // these are atomic as an annotation for thread-sanitizer
    atomic_bool stop;         // Used for stopping the threadpool altogether
    atomic_bool pause;        // Used for pausing the threadpool or individual threads
-    atomic_bool abort;        // Used for aborting processing of a graph
+    atomic_int abort;         // Used for aborting processing of a graph

    struct ggml_compute_state * workers;   // per thread state
    int          n_threads_max; // number of threads in the pool
@@ -13851,14 +13851,14 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
        /*.threadpool=*/ tp,
    };

-    for (int node_n = 0; node_n < cgraph->n_nodes && !tp->abort; node_n++) {
+    for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
        struct ggml_tensor * node = cgraph->nodes[node_n];

        ggml_compute_forward(&params, node);

        if (state->ith == 0 && cplan->abort_callback &&
                cplan->abort_callback(cplan->abort_callback_data)) {
-            tp->abort = true;
+            atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
            tp->ec    = GGML_STATUS_ABORTED;
        }

@@ -14031,7 +14031,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
        threadpool->current_chunk    = 0;
        threadpool->stop             = false;
        threadpool->pause            = tpp->paused;
-        threadpool->abort            = false;
+        threadpool->abort            = -1;
        threadpool->workers          = NULL;
        threadpool->n_threads_max    = tpp->n_threads;
        threadpool->n_threads_cur    = tpp->n_threads;
@@ -14110,7 +14110,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
        threadpool->cgraph           = cgraph;
        threadpool->cplan            = cplan;
        threadpool->current_chunk    = 0;
-        threadpool->abort            = false;
+        threadpool->abort            = -1;
        threadpool->ec               = GGML_STATUS_SUCCESS;
    }

@@ -128,6 +128,10 @@ static void ggml_print_backtrace_symbols(void) {
 #endif

 static void ggml_print_backtrace(void) {
+    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
+    if (GGML_NO_BACKTRACE) {
+        return;
+    }
    char attach[32];
    snprintf(attach, sizeof(attach), "attach %d", getpid());
    int pid = fork();
@@ -1 +1 @@
-d92321c0d151fe73a47d89738c7c3091ac904297
+32f0b85987396945afea2291d5f4c5862434292b
Author	SHA1	Message	Date
Eric Curtin	f0d4b29edf	Parse https://ollama.com/library/ syntax (#11480 ) People search for ollama models using the web ui, this change allows one to copy the url from the browser and for it to be compatible with llama-run. Signed-off-by: Eric Curtin <ecurtin@redhat.com>	2025-01-29 11:23:10 +00:00
Georgi Gerganov	815857791d	sync : ggml	2025-01-29 11:25:29 +02:00
William Tambellini	1a0e87d291	ggml : add option to not print stack on abort (ggml/1081) * Add option to not print stack on abort Add option/envvar to disable stack printing on abort. Also link some unittests with Threads to fix link errors on ubuntu/g++11. * Update ggml/src/ggml.c --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>	2025-01-29 11:24:53 +02:00
issixx	d2e518e9b4	ggml-cpu : fix ggml_graph_compute_thread did not terminate on abort. (ggml/1065) some threads kept looping and failed to terminate properly after an abort during CPU execution. Co-authored-by: issi <issi@gmail.com>	2025-01-29 11:24:51 +02:00