Skip to content

bootstrap

bootstrap_rags(registry)

Scan knowledge bases and external repos for LlamaIndex storage and register them as RAGProviders.

Source code in wintermute/ai/bootstrap.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def bootstrap_rags(registry: LLMRegistry) -> list[RAGProvider]:
    """Scan knowledge bases and external repos for LlamaIndex storage and register them as RAGProviders."""
    search_paths = ["./knowledge_bases", "./external_repos"]
    new_providers: list[RAGProvider] = []

    # Defaults from environment
    default_base = os.getenv("DEFAULT_RAG_PROVIDER", "bedrock")
    default_embed_provider = os.getenv("DEFAULT_EMBED_PROVIDER", "bedrock")
    default_embed_model = os.getenv(
        "DEFAULT_EMBED_MODEL", "amazon.titan-embed-text-v2:0"
    )
    qdrant_api_key = os.getenv("QDRANT_API_KEY", "")

    for path_str in search_paths:
        base_path = Path(path_str)
        if not base_path.exists():
            continue

        for folder in base_path.iterdir():
            if not folder.is_dir():
                continue

            config_file = folder / "rag_config.json"
            storage_db = folder / "storage_db"

            # Parse config first to determine vector store type
            config: dict[str, Any] = {}
            if config_file.exists():
                try:
                    with open(config_file, "r") as f:
                        config = json.load(f)
                except Exception as e:
                    log.error(f"Failed to load config for rag-{folder.name}: {e}")
                    continue

            vector_store_type = config.get("vector_store_type", "local")
            has_local_storage = storage_db.exists() and storage_db.is_dir()
            has_qdrant_config = vector_store_type == "qdrant"

            if not has_local_storage and not has_qdrant_config:
                continue  # skip — no usable storage

            provider_id = f"rag-{folder.name}"

            # Read config fields with backward-compat fallback
            base_id = config.get("base_provider_id", default_base)
            embed_id = config.get("embed_provider_id", default_embed_provider)
            embed_model_id = config.get(
                "embedding_model", config.get("embed_model_id", default_embed_model)
            )
            rag_description = config.get("description", "")

            # Qdrant fields
            qdrant_url = config.get("qdrant_url", "")
            db_path = config.get("db_path", "")
            qdrant_collection = config.get("qdrant_collection_name", folder.name)

            # Metadata (logged but not used for bootstrap logic)
            document_types: list[str] = config.get("document_types", [])
            created_at = config.get("created_at", "")

            log.info(
                f"Bootstrapping RAG provider: {provider_id} (Base: {base_id}, Embed: {embed_id})"
            )

            try:
                # Resolve dependencies
                try:
                    base_provider = registry.get(base_id)
                except KeyError:
                    log.error(
                        f"Base provider '{base_id}' not found for RAG {provider_id}"
                    )
                    continue

                embed_provider = None
                try:
                    embed_provider = registry.get(embed_id)
                except KeyError:
                    log.warning(
                        f"Embedding provider '{embed_id}' not found. LlamaIndex may fail if embeddings are needed."
                    )

                # Build vector store for Qdrant KBs
                vector_store = None
                if vector_store_type == "qdrant":
                    from llama_index.vector_stores.qdrant import QdrantVectorStore
                    from qdrant_client import QdrantClient

                    if qdrant_url:
                        # Remote Qdrant server
                        qclient = QdrantClient(
                            url=qdrant_url,
                            api_key=qdrant_api_key or None,
                        )
                    elif db_path:
                        # Local on-disk Qdrant database
                        qclient = QdrantClient(path=db_path)
                    else:
                        # Local Qdrant inside the KB folder itself
                        qclient = QdrantClient(path=str(folder / "qdrant_db"))

                    vector_store = QdrantVectorStore(
                        client=qclient,
                        collection_name=qdrant_collection,
                    )
                    log.info(
                        f"Using Qdrant vector store for {provider_id}: "
                        f"url={qdrant_url or 'local'}, collection={qdrant_collection}"
                    )

                if document_types:
                    log.info(f"  Document types: {document_types}")
                if created_at:
                    log.info(f"  Created at: {created_at}")

                rag_provider = RAGProvider(
                    name=provider_id,
                    base_provider=base_provider,
                    persist_dir=str(storage_db),
                    embed_provider=embed_provider,
                    embed_model_id=embed_model_id,
                    description=rag_description,
                    vector_store=vector_store,
                )
                registry.register(rag_provider)
                new_providers.append(rag_provider)
            except Exception as e:
                log.error(f"Failed to bootstrap RAG provider {provider_id}: {e}")

    return new_providers

init_router()

Initialize and return a Router with registered LLM providers.

Source code in wintermute/ai/bootstrap.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
def init_router() -> Router:
    """Initialize and return a Router with registered LLM providers."""
    # Register all providers first
    register_bedrock(region=os.getenv("AWS_REGION", "us-east-1"))
    register_groq(api_key=os.getenv("GROQ_API_KEY"))
    register_openai(api_key=os.getenv("OPENAI_API_KEY"))
    register_huggingface(as_name="local_embedder")

    # Bootstrap RAGs dynamically
    try:
        bootstrap_rags(llms)
    except Exception as e:
        log.warning(f"Could not bootstrap RAGs: {e}")

    # Make Bedrock the primary
    return Router(
        default_provider="bedrock", default_model=os.getenv("BEDROCK_MODEL_ID")
    )