-
Notifications
You must be signed in to change notification settings - Fork 8
Collection: making the module provider agnostic #548
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
fd18a02
364a2b5
541e311
9ee861f
9ed9ca5
aed5e24
0d6c444
97178fd
24018c2
a6fb950
26fa792
8b5427b
f0a4134
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| """extend collection table for provider agnostic support | ||
|
|
||
| Revision ID: 041 | ||
| Revises: 040 | ||
| Create Date: 2026-01-15 16:53:19.495583 | ||
|
|
||
| """ | ||
| from alembic import op | ||
| import sqlalchemy as sa | ||
| import sqlmodel.sql.sqltypes | ||
| from sqlalchemy.dialects import postgresql | ||
|
|
||
|
|
||
| # revision identifiers, used by Alembic. | ||
| revision = "041" | ||
| down_revision = "040" | ||
| branch_labels = None | ||
| depends_on = None | ||
|
|
||
| provider_type = postgresql.ENUM( | ||
| "openai", | ||
| # aws | ||
| # gemini | ||
| name="providertype", | ||
| create_type=False, | ||
| ) | ||
|
|
||
|
|
||
| def upgrade(): | ||
| provider_type.create(op.get_bind(), checkfirst=True) | ||
| op.add_column( | ||
| "collection", | ||
| sa.Column( | ||
| "provider", | ||
| provider_type, | ||
| nullable=True, | ||
| comment="LLM provider used for this collection", | ||
| ), | ||
| ) | ||
| op.execute("UPDATE collection SET provider = 'openai' WHERE provider IS NULL") | ||
| op.alter_column("collection", "provider", nullable=False) | ||
| op.add_column( | ||
| "collection", | ||
| sa.Column( | ||
| "name", | ||
| sqlmodel.sql.sqltypes.AutoString(), | ||
| nullable=True, | ||
| comment="Name of the collection", | ||
| ), | ||
| ) | ||
| op.add_column( | ||
| "collection", | ||
| sa.Column( | ||
| "description", | ||
| sqlmodel.sql.sqltypes.AutoString(), | ||
| nullable=True, | ||
| comment="Description of the collection", | ||
| ), | ||
| ) | ||
| op.alter_column( | ||
| "collection", | ||
| "llm_service_name", | ||
| existing_type=sa.VARCHAR(), | ||
| comment="Name of the LLM service", | ||
| existing_comment="Name of the LLM service provider", | ||
| existing_nullable=False, | ||
| ) | ||
| op.create_unique_constraint( | ||
| "uq_collection_project_id_name", "collection", ["project_id", "name"] | ||
| ) | ||
| op.drop_constraint( | ||
| op.f("collection_organization_id_fkey"), "collection", type_="foreignkey" | ||
| ) | ||
| op.drop_column("collection", "organization_id") | ||
|
|
||
|
|
||
| def downgrade(): | ||
| op.add_column( | ||
| "collection", | ||
| sa.Column( | ||
| "organization_id", | ||
| sa.INTEGER(), | ||
| autoincrement=False, | ||
| nullable=True, | ||
| comment="Reference to the organization", | ||
| ), | ||
| ) | ||
| op.execute( | ||
| """UPDATE collection SET organization_id = (SELECT organization_id FROM project | ||
| WHERE project.id = collection.project_id)""" | ||
| ) | ||
| op.alter_column("collection", "organization_id", nullable=False) | ||
| op.create_foreign_key( | ||
| op.f("collection_organization_id_fkey"), | ||
| "collection", | ||
| "organization", | ||
| ["organization_id"], | ||
| ["id"], | ||
| ondelete="CASCADE", | ||
| ) | ||
| op.drop_constraint("uq_collection_project_id_name", "collection", type_="unique") | ||
| op.alter_column( | ||
| "collection", | ||
| "llm_service_name", | ||
| existing_type=sa.VARCHAR(), | ||
| comment="Name of the LLM service provider", | ||
| existing_comment="Name of the LLM service", | ||
| existing_nullable=False, | ||
| ) | ||
| op.drop_column("collection", "description") | ||
| op.drop_column("collection", "name") | ||
| op.drop_column("collection", "provider") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,71 +1,102 @@ | ||
| from datetime import datetime | ||
| from enum import Enum | ||
| from typing import Any, Literal | ||
| from uuid import UUID, uuid4 | ||
|
|
||
| from pydantic import HttpUrl, model_validator | ||
| from sqlalchemy import UniqueConstraint | ||
| from sqlmodel import Field, Relationship, SQLModel | ||
|
|
||
| from app.core.util import now | ||
| from app.models.document import DocumentPublic | ||
|
|
||
| from .organization import Organization | ||
| from .project import Project | ||
|
|
||
|
|
||
| class ProviderType(str, Enum): | ||
| """Supported LLM providers for collections.""" | ||
|
|
||
| openai = "openai" | ||
| # BEDROCK = "bedrock" | ||
| # GEMINI = "gemini" | ||
|
|
||
|
|
||
| class Collection(SQLModel, table=True): | ||
| """Database model for Collection operations.""" | ||
|
|
||
| __table_args__ = ( | ||
| UniqueConstraint( | ||
| "project_id", | ||
| "name", | ||
| name="uq_collection_project_id_name", | ||
| ), | ||
| ) | ||
|
|
||
| id: UUID = Field( | ||
| default_factory=uuid4, | ||
| primary_key=True, | ||
| description="Unique identifier for the collection", | ||
| sa_column_kwargs={"comment": "Unique identifier for the collection"}, | ||
| ) | ||
| provider: ProviderType = ( | ||
| Field( | ||
| nullable=False, | ||
| description="LLM provider used for this collection (e.g., 'openai', 'bedrock', 'gemini', etc)", | ||
| sa_column_kwargs={"comment": "LLM provider used for this collection"}, | ||
| ), | ||
| ) | ||
| llm_service_id: str = Field( | ||
| nullable=False, | ||
| description="External LLM service identifier (e.g., OpenAI vector store ID)", | ||
| sa_column_kwargs={ | ||
| "comment": "External LLM service identifier (e.g., OpenAI vector store ID)" | ||
| }, | ||
| ) | ||
| llm_service_name: str = Field( | ||
| nullable=False, | ||
| description="Name of the LLM service", | ||
| sa_column_kwargs={"comment": "Name of the LLM service"}, | ||
| ) | ||
|
|
||
| # Foreign keys | ||
| organization_id: int = Field( | ||
| foreign_key="organization.id", | ||
| nullable=False, | ||
| ondelete="CASCADE", | ||
| sa_column_kwargs={"comment": "Reference to the organization"}, | ||
| name: str = Field( | ||
| nullable=True, | ||
| description="Name of the collection", | ||
| sa_column_kwargs={"comment": "Name of the collection"}, | ||
| ) | ||
| description: str = Field( | ||
| nullable=True, | ||
| description="Description of the collection", | ||
| sa_column_kwargs={"comment": "Description of the collection"}, | ||
| ) | ||
| project_id: int = Field( | ||
| foreign_key="project.id", | ||
| nullable=False, | ||
| ondelete="CASCADE", | ||
| description="Project the collection belongs to", | ||
| sa_column_kwargs={"comment": "Reference to the project"}, | ||
| ) | ||
|
|
||
| # Timestamps | ||
| inserted_at: datetime = Field( | ||
| default_factory=now, | ||
| description="Timestamp when the collection was created", | ||
| sa_column_kwargs={"comment": "Timestamp when the collection was created"}, | ||
| ) | ||
| updated_at: datetime = Field( | ||
| default_factory=now, | ||
| description="Timestamp when the collection was updated", | ||
| sa_column_kwargs={"comment": "Timestamp when the collection was last updated"}, | ||
| ) | ||
| deleted_at: datetime | None = Field( | ||
| default=None, | ||
| description="Timestamp when the collection was deleted", | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. likewise nitpick |
||
| sa_column_kwargs={"comment": "Timestamp when the collection was deleted"}, | ||
| ) | ||
|
|
||
| # Relationships | ||
| organization: Organization = Relationship(back_populates="collections") | ||
| project: Project = Relationship(back_populates="collections") | ||
|
|
||
|
|
||
| # Request models | ||
| class DocumentOptions(SQLModel): | ||
| class CollectionOptions(SQLModel): | ||
| name: str | None = Field(default=None, description="Name of the collection") | ||
| description: str | None = Field( | ||
| default=None, description="Description of the collection" | ||
| ) | ||
| documents: list[UUID] = Field( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. list of strings to avoid throwing validation error if the document id is an int or non UUID string. The case might arise for liberal type enforcing later point in time. |
||
| description="List of document IDs", | ||
| ) | ||
|
|
@@ -154,9 +185,9 @@ class ProviderOptions(SQLModel): | |
|
|
||
|
|
||
| class CreationRequest( | ||
| DocumentOptions, | ||
| ProviderOptions, | ||
| AssistantOptions, | ||
| CollectionOptions, | ||
| ProviderOptions, | ||
| CallbackRequest, | ||
| ): | ||
| def extract_super_type(self, cls: "CreationRequest"): | ||
|
|
@@ -181,7 +212,6 @@ class CollectionPublic(SQLModel): | |
| llm_service_id: str | ||
| llm_service_name: str | ||
| project_id: int | ||
| organization_id: int | ||
|
|
||
| inserted_at: datetime | ||
| updated_at: datetime | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nitpick: we can avoid description for self-evident keys