kiln_ai.datamodel
1from __future__ import annotations 2 3import json 4from enum import Enum, IntEnum 5from typing import TYPE_CHECKING, Dict, List, Self, Type, Union 6 7import jsonschema 8import jsonschema.exceptions 9from pydantic import BaseModel, Field, model_validator 10 11from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str 12 13from .basemodel import ( 14 ID_FIELD, 15 ID_TYPE, 16 KilnBaseModel, 17 KilnParentedModel, 18 KilnParentModel, 19) 20from .json_schema import validate_schema 21 22if TYPE_CHECKING: 23 from . import Task 24 25 26__all__ = [ 27 "basemodel", 28 "json_schema", 29 "Task", 30 "Project", 31 "TaskRun", 32 "TaskOutput", 33 "TaskOutputRating", 34 "Priority", 35 "DataSource", 36 "DataSourceType", 37 "DataSourceProperty", 38 "TaskOutputRatingType", 39 "TaskRequirement", 40 "TaskDeterminism", 41] 42 43 44# Conventions: 45# 1) Names are filename safe as they may be used as file names. They are informational and not to be used in prompts/training/validation. 46# 2) Descrptions are for Kiln users to describe/understanding the purpose of this object. They must never be used in prompts/training/validation. Use "instruction/requirements" instead. 47 48# Filename compatible names 49NAME_REGEX = r"^[A-Za-z0-9 _-]+$" 50NAME_FIELD = Field(min_length=1, max_length=120, pattern=NAME_REGEX) 51SHORT_NAME_FIELD = Field(min_length=1, max_length=20, pattern=NAME_REGEX) 52 53 54class Priority(IntEnum): 55 p0 = 0 56 p1 = 1 57 p2 = 2 58 p3 = 3 59 60 61# Only one rating type for now, but this allows for extensibility if we want to add more in the future 62class TaskOutputRatingType(str, Enum): 63 five_star = "five_star" 64 custom = "custom" 65 66 67class TaskOutputRating(KilnBaseModel): 68 """ 69 A rating for a task output, including an overall rating and ratings for each requirement. 70 71 Only supports five star ratings for now, but extensible for custom values. 72 """ 73 74 type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star) 75 value: float | None = Field( 76 description="The overall rating value (typically 1-5 stars).", 77 default=None, 78 ) 79 requirement_ratings: Dict[ID_TYPE, float] = Field( 80 default={}, 81 description="The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).", 82 ) 83 84 # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc) 85 def is_high_quality(self) -> bool: 86 if self.type == TaskOutputRatingType.five_star: 87 return self.value is not None and self.value >= 4 88 return False 89 90 @model_validator(mode="after") 91 def validate_rating(self) -> Self: 92 if self.type not in TaskOutputRatingType: 93 raise ValueError(f"Invalid rating type: {self.type}") 94 95 if self.type == TaskOutputRatingType.five_star: 96 if self.value is not None: 97 self._validate_five_star(self.value, "overall rating") 98 for req_id, req_rating in self.requirement_ratings.items(): 99 self._validate_five_star(req_rating, f"requirement rating for {req_id}") 100 101 return self 102 103 def _validate_five_star(self, rating: float, rating_name: str) -> None: 104 if not isinstance(rating, float) or not rating.is_integer(): 105 raise ValueError( 106 f"{rating_name.capitalize()} of type five_star must be an integer value (1.0, 2.0, 3.0, 4.0, or 5.0)" 107 ) 108 if rating < 1 or rating > 5: 109 raise ValueError( 110 f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars" 111 ) 112 113 def validate_requirement_rating_keys(self, task: Task) -> Self: 114 if len(self.requirement_ratings) == 0: 115 return self 116 117 valid_requirement_ids = {req.id for req in task.requirements} 118 for key in self.requirement_ratings.keys(): 119 if key not in valid_requirement_ids: 120 raise ValueError( 121 f"Requirement ID '{key}' is not a valid requirement ID for this task" 122 ) 123 return self 124 125 126class TaskOutput(KilnBaseModel): 127 """ 128 An output for a specific task run. 129 """ 130 131 output: str = Field( 132 description="The output of the task. JSON formatted for structured output, plaintext for unstructured output." 133 ) 134 source: DataSource = Field( 135 description="The source of the output: human or synthetic." 136 ) 137 rating: TaskOutputRating | None = Field( 138 default=None, description="The rating of the output" 139 ) 140 141 def validate_output_format(self, task: Task) -> Self: 142 # validate output 143 if task.output_json_schema is not None: 144 try: 145 validate_schema(json.loads(self.output), task.output_json_schema) 146 except json.JSONDecodeError: 147 raise ValueError("Output is not a valid JSON object") 148 except jsonschema.exceptions.ValidationError as e: 149 raise ValueError(f"Output does not match task output schema: {e}") 150 return self 151 152 153class DataSourceType(str, Enum): 154 """ 155 The source of a piece of data. 156 """ 157 158 human = "human" 159 synthetic = "synthetic" 160 161 162class DataSourceProperty(BaseModel): 163 name: str 164 type: Type[Union[str, int, float]] 165 required_for: List[DataSourceType] = [] 166 not_allowed_for: List[DataSourceType] = [] 167 168 169class DataSource(BaseModel): 170 type: DataSourceType 171 properties: Dict[str, str | int | float] = Field( 172 default={}, 173 description="Properties describing the data source. For synthetic things like model. For human, the human's name.", 174 ) 175 176 _data_source_properties = [ 177 DataSourceProperty( 178 name="created_by", 179 type=str, 180 required_for=[DataSourceType.human], 181 not_allowed_for=[DataSourceType.synthetic], 182 ), 183 DataSourceProperty( 184 name="model_name", 185 type=str, 186 required_for=[DataSourceType.synthetic], 187 not_allowed_for=[DataSourceType.human], 188 ), 189 DataSourceProperty( 190 name="model_provider", 191 type=str, 192 required_for=[DataSourceType.synthetic], 193 not_allowed_for=[DataSourceType.human], 194 ), 195 DataSourceProperty( 196 name="adapter_name", 197 type=str, 198 required_for=[DataSourceType.synthetic], 199 not_allowed_for=[DataSourceType.human], 200 ), 201 DataSourceProperty( 202 name="prompt_builder_name", 203 type=str, 204 not_allowed_for=[DataSourceType.human], 205 ), 206 ] 207 208 @model_validator(mode="after") 209 def validate_type(self) -> "DataSource": 210 if self.type not in DataSourceType: 211 raise ValueError(f"Invalid data source type: {self.type}") 212 return self 213 214 @model_validator(mode="after") 215 def validate_properties(self) -> "DataSource": 216 for prop in self._data_source_properties: 217 # Check the property type is correct 218 if prop.name in self.properties: 219 if not isinstance(self.properties[prop.name], prop.type): 220 raise ValueError( 221 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 222 ) 223 # Check the property is required for the data source type 224 if self.type in prop.required_for: 225 if prop.name not in self.properties: 226 raise ValueError( 227 f"'{prop.name}' is required for {self.type} data source" 228 ) 229 # Check the property is not allowed for the data source type 230 elif self.type in prop.not_allowed_for and prop.name in self.properties: 231 raise ValueError( 232 f"'{prop.name}' is not allowed for {self.type} data source" 233 ) 234 return self 235 236 @model_validator(mode="after") 237 def validate_no_empty_properties(self) -> Self: 238 for prop, value in self.properties.items(): 239 if isinstance(value, str) and value == "": 240 raise ValueError( 241 f"Property '{prop}' must be a non-empty string for {self.type} data source" 242 ) 243 return self 244 245 246class TaskRun(KilnParentedModel): 247 """ 248 An run of a specific Task, including the input and output. 249 """ 250 251 input: str = Field( 252 description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input." 253 ) 254 input_source: DataSource = Field( 255 description="The source of the input: human or synthetic." 256 ) 257 258 output: TaskOutput = Field(description="The output of the task run.") 259 repair_instructions: str | None = Field( 260 default=None, 261 description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.", 262 ) 263 repaired_output: TaskOutput | None = Field( 264 default=None, 265 description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.", 266 ) 267 268 def parent_task(self) -> Task | None: 269 if not isinstance(self.parent, Task): 270 return None 271 return self.parent 272 273 @model_validator(mode="after") 274 def validate_input_format(self) -> Self: 275 task = self.parent_task() 276 if task is None: 277 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 278 return self 279 280 # validate output 281 if task.input_json_schema is not None: 282 try: 283 validate_schema(json.loads(self.input), task.input_json_schema) 284 except json.JSONDecodeError: 285 raise ValueError("Input is not a valid JSON object") 286 except jsonschema.exceptions.ValidationError as e: 287 raise ValueError(f"Input does not match task input schema: {e}") 288 return self 289 290 @model_validator(mode="after") 291 def validate_output_format(self) -> Self: 292 task = self.parent_task() 293 if task is None: 294 return self 295 296 self.output.validate_output_format(task) 297 return self 298 299 @model_validator(mode="after") 300 def validate_requirement_ratings(self) -> Self: 301 task = self.parent_task() 302 if task is None: 303 return self 304 305 if self.output.rating is not None: 306 self.output.rating.validate_requirement_rating_keys(task) 307 if self.repaired_output is not None and self.repaired_output.rating is not None: 308 self.repaired_output.rating.validate_requirement_rating_keys(task) 309 310 return self 311 312 @model_validator(mode="after") 313 def validate_repaired_output(self) -> Self: 314 if self.repaired_output is not None: 315 if self.repaired_output.rating is not None: 316 raise ValueError( 317 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 318 ) 319 if self.repair_instructions is None and self.repaired_output is not None: 320 raise ValueError( 321 "Repair instructions are required if providing a repaired output." 322 ) 323 if self.repair_instructions is not None and self.repaired_output is None: 324 raise ValueError( 325 "A repaired output is required if providing repair instructions." 326 ) 327 return self 328 329 330class TaskRequirement(BaseModel): 331 id: ID_TYPE = ID_FIELD 332 name: str = SHORT_NAME_FIELD 333 description: str | None = Field(default=None) 334 instruction: str = Field(min_length=1) 335 priority: Priority = Field(default=Priority.p2) 336 337 338class TaskDeterminism(str, Enum): 339 deterministic = "deterministic" # Expect exact match 340 semantic_match = "semantic_match" # Expect same meaning, but flexible on expression of the meaning 341 flexible = "flexible" # Flexible on semantic output. Eval should be custom based on parsing requirements. 342 343 344class Task( 345 KilnParentedModel, 346 KilnParentModel, 347 parent_of={"runs": TaskRun}, 348): 349 name: str = NAME_FIELD 350 description: str = Field(default="") 351 priority: Priority = Field(default=Priority.p2) 352 determinism: TaskDeterminism = Field(default=TaskDeterminism.flexible) 353 instruction: str = Field(min_length=1) 354 requirements: List[TaskRequirement] = Field(default=[]) 355 # TODO: make this required, or formalize the default message output schema 356 output_json_schema: JsonObjectSchema | None = None 357 input_json_schema: JsonObjectSchema | None = None 358 359 def output_schema(self) -> Dict | None: 360 if self.output_json_schema is None: 361 return None 362 return schema_from_json_str(self.output_json_schema) 363 364 def input_schema(self) -> Dict | None: 365 if self.input_json_schema is None: 366 return None 367 return schema_from_json_str(self.input_json_schema) 368 369 # Needed for typechecking. TODO P2: fix this in KilnParentModel 370 def runs(self) -> list[TaskRun]: 371 return super().runs() # type: ignore 372 373 374class Project(KilnParentModel, parent_of={"tasks": Task}): 375 name: str = NAME_FIELD 376 description: str | None = Field( 377 default=None, 378 description="A description of the project for you and your team. Will not be used in prompts/training/validation.", 379 ) 380 381 # Needed for typechecking. TODO P2: fix this in KilnParentModel 382 def tasks(self) -> list[Task]: 383 return super().tasks() # type: ignore
345class Task( 346 KilnParentedModel, 347 KilnParentModel, 348 parent_of={"runs": TaskRun}, 349): 350 name: str = NAME_FIELD 351 description: str = Field(default="") 352 priority: Priority = Field(default=Priority.p2) 353 determinism: TaskDeterminism = Field(default=TaskDeterminism.flexible) 354 instruction: str = Field(min_length=1) 355 requirements: List[TaskRequirement] = Field(default=[]) 356 # TODO: make this required, or formalize the default message output schema 357 output_json_schema: JsonObjectSchema | None = None 358 input_json_schema: JsonObjectSchema | None = None 359 360 def output_schema(self) -> Dict | None: 361 if self.output_json_schema is None: 362 return None 363 return schema_from_json_str(self.output_json_schema) 364 365 def input_schema(self) -> Dict | None: 366 if self.input_json_schema is None: 367 return None 368 return schema_from_json_str(self.input_json_schema) 369 370 # Needed for typechecking. TODO P2: fix this in KilnParentModel 371 def runs(self) -> list[TaskRun]: 372 return super().runs() # type: ignore
Usage docs: https://docs.pydantic.dev/2.8/concepts/models/
A base class for creating Pydantic models.
Attributes: __class_vars__: The names of classvars defined on the model. __private_attributes__: Metadata about the private attributes of the model. __signature__: The signature for instantiating the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
__args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a `RootModel`.
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__: An instance attribute with the values of extra fields from validation when
`model_config['extra'] == 'allow'`.
__pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
__pydantic_private__: Instance attribute with the values of private attributes set on the model instance.
105 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 106 """We need to both initialize private attributes and call the user-defined model_post_init 107 method. 108 """ 109 init_private_attributes(self, context) 110 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
Inherited Members
375class Project(KilnParentModel, parent_of={"tasks": Task}): 376 name: str = NAME_FIELD 377 description: str | None = Field( 378 default=None, 379 description="A description of the project for you and your team. Will not be used in prompts/training/validation.", 380 ) 381 382 # Needed for typechecking. TODO P2: fix this in KilnParentModel 383 def tasks(self) -> list[Task]: 384 return super().tasks() # type: ignore
Usage docs: https://docs.pydantic.dev/2.8/concepts/models/
A base class for creating Pydantic models.
Attributes: __class_vars__: The names of classvars defined on the model. __private_attributes__: Metadata about the private attributes of the model. __signature__: The signature for instantiating the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
__args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a `RootModel`.
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__: An instance attribute with the values of extra fields from validation when
`model_config['extra'] == 'allow'`.
__pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
__pydantic_private__: Instance attribute with the values of private attributes set on the model instance.
247class TaskRun(KilnParentedModel): 248 """ 249 An run of a specific Task, including the input and output. 250 """ 251 252 input: str = Field( 253 description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input." 254 ) 255 input_source: DataSource = Field( 256 description="The source of the input: human or synthetic." 257 ) 258 259 output: TaskOutput = Field(description="The output of the task run.") 260 repair_instructions: str | None = Field( 261 default=None, 262 description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.", 263 ) 264 repaired_output: TaskOutput | None = Field( 265 default=None, 266 description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.", 267 ) 268 269 def parent_task(self) -> Task | None: 270 if not isinstance(self.parent, Task): 271 return None 272 return self.parent 273 274 @model_validator(mode="after") 275 def validate_input_format(self) -> Self: 276 task = self.parent_task() 277 if task is None: 278 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 279 return self 280 281 # validate output 282 if task.input_json_schema is not None: 283 try: 284 validate_schema(json.loads(self.input), task.input_json_schema) 285 except json.JSONDecodeError: 286 raise ValueError("Input is not a valid JSON object") 287 except jsonschema.exceptions.ValidationError as e: 288 raise ValueError(f"Input does not match task input schema: {e}") 289 return self 290 291 @model_validator(mode="after") 292 def validate_output_format(self) -> Self: 293 task = self.parent_task() 294 if task is None: 295 return self 296 297 self.output.validate_output_format(task) 298 return self 299 300 @model_validator(mode="after") 301 def validate_requirement_ratings(self) -> Self: 302 task = self.parent_task() 303 if task is None: 304 return self 305 306 if self.output.rating is not None: 307 self.output.rating.validate_requirement_rating_keys(task) 308 if self.repaired_output is not None and self.repaired_output.rating is not None: 309 self.repaired_output.rating.validate_requirement_rating_keys(task) 310 311 return self 312 313 @model_validator(mode="after") 314 def validate_repaired_output(self) -> Self: 315 if self.repaired_output is not None: 316 if self.repaired_output.rating is not None: 317 raise ValueError( 318 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 319 ) 320 if self.repair_instructions is None and self.repaired_output is not None: 321 raise ValueError( 322 "Repair instructions are required if providing a repaired output." 323 ) 324 if self.repair_instructions is not None and self.repaired_output is None: 325 raise ValueError( 326 "A repaired output is required if providing repair instructions." 327 ) 328 return self
An run of a specific Task, including the input and output.
274 @model_validator(mode="after") 275 def validate_input_format(self) -> Self: 276 task = self.parent_task() 277 if task is None: 278 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 279 return self 280 281 # validate output 282 if task.input_json_schema is not None: 283 try: 284 validate_schema(json.loads(self.input), task.input_json_schema) 285 except json.JSONDecodeError: 286 raise ValueError("Input is not a valid JSON object") 287 except jsonschema.exceptions.ValidationError as e: 288 raise ValueError(f"Input does not match task input schema: {e}") 289 return self
300 @model_validator(mode="after") 301 def validate_requirement_ratings(self) -> Self: 302 task = self.parent_task() 303 if task is None: 304 return self 305 306 if self.output.rating is not None: 307 self.output.rating.validate_requirement_rating_keys(task) 308 if self.repaired_output is not None and self.repaired_output.rating is not None: 309 self.repaired_output.rating.validate_requirement_rating_keys(task) 310 311 return self
313 @model_validator(mode="after") 314 def validate_repaired_output(self) -> Self: 315 if self.repaired_output is not None: 316 if self.repaired_output.rating is not None: 317 raise ValueError( 318 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 319 ) 320 if self.repair_instructions is None and self.repaired_output is not None: 321 raise ValueError( 322 "Repair instructions are required if providing a repaired output." 323 ) 324 if self.repair_instructions is not None and self.repaired_output is None: 325 raise ValueError( 326 "A repaired output is required if providing repair instructions." 327 ) 328 return self
105 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 106 """We need to both initialize private attributes and call the user-defined model_post_init 107 method. 108 """ 109 init_private_attributes(self, context) 110 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
Inherited Members
127class TaskOutput(KilnBaseModel): 128 """ 129 An output for a specific task run. 130 """ 131 132 output: str = Field( 133 description="The output of the task. JSON formatted for structured output, plaintext for unstructured output." 134 ) 135 source: DataSource = Field( 136 description="The source of the output: human or synthetic." 137 ) 138 rating: TaskOutputRating | None = Field( 139 default=None, description="The rating of the output" 140 ) 141 142 def validate_output_format(self, task: Task) -> Self: 143 # validate output 144 if task.output_json_schema is not None: 145 try: 146 validate_schema(json.loads(self.output), task.output_json_schema) 147 except json.JSONDecodeError: 148 raise ValueError("Output is not a valid JSON object") 149 except jsonschema.exceptions.ValidationError as e: 150 raise ValueError(f"Output does not match task output schema: {e}") 151 return self
An output for a specific task run.
142 def validate_output_format(self, task: Task) -> Self: 143 # validate output 144 if task.output_json_schema is not None: 145 try: 146 validate_schema(json.loads(self.output), task.output_json_schema) 147 except json.JSONDecodeError: 148 raise ValueError("Output is not a valid JSON object") 149 except jsonschema.exceptions.ValidationError as e: 150 raise ValueError(f"Output does not match task output schema: {e}") 151 return self
68class TaskOutputRating(KilnBaseModel): 69 """ 70 A rating for a task output, including an overall rating and ratings for each requirement. 71 72 Only supports five star ratings for now, but extensible for custom values. 73 """ 74 75 type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star) 76 value: float | None = Field( 77 description="The overall rating value (typically 1-5 stars).", 78 default=None, 79 ) 80 requirement_ratings: Dict[ID_TYPE, float] = Field( 81 default={}, 82 description="The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).", 83 ) 84 85 # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc) 86 def is_high_quality(self) -> bool: 87 if self.type == TaskOutputRatingType.five_star: 88 return self.value is not None and self.value >= 4 89 return False 90 91 @model_validator(mode="after") 92 def validate_rating(self) -> Self: 93 if self.type not in TaskOutputRatingType: 94 raise ValueError(f"Invalid rating type: {self.type}") 95 96 if self.type == TaskOutputRatingType.five_star: 97 if self.value is not None: 98 self._validate_five_star(self.value, "overall rating") 99 for req_id, req_rating in self.requirement_ratings.items(): 100 self._validate_five_star(req_rating, f"requirement rating for {req_id}") 101 102 return self 103 104 def _validate_five_star(self, rating: float, rating_name: str) -> None: 105 if not isinstance(rating, float) or not rating.is_integer(): 106 raise ValueError( 107 f"{rating_name.capitalize()} of type five_star must be an integer value (1.0, 2.0, 3.0, 4.0, or 5.0)" 108 ) 109 if rating < 1 or rating > 5: 110 raise ValueError( 111 f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars" 112 ) 113 114 def validate_requirement_rating_keys(self, task: Task) -> Self: 115 if len(self.requirement_ratings) == 0: 116 return self 117 118 valid_requirement_ids = {req.id for req in task.requirements} 119 for key in self.requirement_ratings.keys(): 120 if key not in valid_requirement_ids: 121 raise ValueError( 122 f"Requirement ID '{key}' is not a valid requirement ID for this task" 123 ) 124 return self
A rating for a task output, including an overall rating and ratings for each requirement.
Only supports five star ratings for now, but extensible for custom values.
91 @model_validator(mode="after") 92 def validate_rating(self) -> Self: 93 if self.type not in TaskOutputRatingType: 94 raise ValueError(f"Invalid rating type: {self.type}") 95 96 if self.type == TaskOutputRatingType.five_star: 97 if self.value is not None: 98 self._validate_five_star(self.value, "overall rating") 99 for req_id, req_rating in self.requirement_ratings.items(): 100 self._validate_five_star(req_rating, f"requirement rating for {req_id}") 101 102 return self
114 def validate_requirement_rating_keys(self, task: Task) -> Self: 115 if len(self.requirement_ratings) == 0: 116 return self 117 118 valid_requirement_ids = {req.id for req in task.requirements} 119 for key in self.requirement_ratings.keys(): 120 if key not in valid_requirement_ids: 121 raise ValueError( 122 f"Requirement ID '{key}' is not a valid requirement ID for this task" 123 ) 124 return self
170class DataSource(BaseModel): 171 type: DataSourceType 172 properties: Dict[str, str | int | float] = Field( 173 default={}, 174 description="Properties describing the data source. For synthetic things like model. For human, the human's name.", 175 ) 176 177 _data_source_properties = [ 178 DataSourceProperty( 179 name="created_by", 180 type=str, 181 required_for=[DataSourceType.human], 182 not_allowed_for=[DataSourceType.synthetic], 183 ), 184 DataSourceProperty( 185 name="model_name", 186 type=str, 187 required_for=[DataSourceType.synthetic], 188 not_allowed_for=[DataSourceType.human], 189 ), 190 DataSourceProperty( 191 name="model_provider", 192 type=str, 193 required_for=[DataSourceType.synthetic], 194 not_allowed_for=[DataSourceType.human], 195 ), 196 DataSourceProperty( 197 name="adapter_name", 198 type=str, 199 required_for=[DataSourceType.synthetic], 200 not_allowed_for=[DataSourceType.human], 201 ), 202 DataSourceProperty( 203 name="prompt_builder_name", 204 type=str, 205 not_allowed_for=[DataSourceType.human], 206 ), 207 ] 208 209 @model_validator(mode="after") 210 def validate_type(self) -> "DataSource": 211 if self.type not in DataSourceType: 212 raise ValueError(f"Invalid data source type: {self.type}") 213 return self 214 215 @model_validator(mode="after") 216 def validate_properties(self) -> "DataSource": 217 for prop in self._data_source_properties: 218 # Check the property type is correct 219 if prop.name in self.properties: 220 if not isinstance(self.properties[prop.name], prop.type): 221 raise ValueError( 222 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 223 ) 224 # Check the property is required for the data source type 225 if self.type in prop.required_for: 226 if prop.name not in self.properties: 227 raise ValueError( 228 f"'{prop.name}' is required for {self.type} data source" 229 ) 230 # Check the property is not allowed for the data source type 231 elif self.type in prop.not_allowed_for and prop.name in self.properties: 232 raise ValueError( 233 f"'{prop.name}' is not allowed for {self.type} data source" 234 ) 235 return self 236 237 @model_validator(mode="after") 238 def validate_no_empty_properties(self) -> Self: 239 for prop, value in self.properties.items(): 240 if isinstance(value, str) and value == "": 241 raise ValueError( 242 f"Property '{prop}' must be a non-empty string for {self.type} data source" 243 ) 244 return self
Usage docs: https://docs.pydantic.dev/2.8/concepts/models/
A base class for creating Pydantic models.
Attributes: __class_vars__: The names of classvars defined on the model. __private_attributes__: Metadata about the private attributes of the model. __signature__: The signature for instantiating the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
__args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a `RootModel`.
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__: An instance attribute with the values of extra fields from validation when
`model_config['extra'] == 'allow'`.
__pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
__pydantic_private__: Instance attribute with the values of private attributes set on the model instance.
215 @model_validator(mode="after") 216 def validate_properties(self) -> "DataSource": 217 for prop in self._data_source_properties: 218 # Check the property type is correct 219 if prop.name in self.properties: 220 if not isinstance(self.properties[prop.name], prop.type): 221 raise ValueError( 222 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 223 ) 224 # Check the property is required for the data source type 225 if self.type in prop.required_for: 226 if prop.name not in self.properties: 227 raise ValueError( 228 f"'{prop.name}' is required for {self.type} data source" 229 ) 230 # Check the property is not allowed for the data source type 231 elif self.type in prop.not_allowed_for and prop.name in self.properties: 232 raise ValueError( 233 f"'{prop.name}' is not allowed for {self.type} data source" 234 ) 235 return self
237 @model_validator(mode="after") 238 def validate_no_empty_properties(self) -> Self: 239 for prop, value in self.properties.items(): 240 if isinstance(value, str) and value == "": 241 raise ValueError( 242 f"Property '{prop}' must be a non-empty string for {self.type} data source" 243 ) 244 return self
281def init_private_attributes(self: BaseModel, context: Any, /) -> None: 282 """This function is meant to behave like a BaseModel method to initialise private attributes. 283 284 It takes context as an argument since that's what pydantic-core passes when calling it. 285 286 Args: 287 self: The BaseModel instance. 288 context: The context. 289 """ 290 if getattr(self, '__pydantic_private__', None) is None: 291 pydantic_private = {} 292 for name, private_attr in self.__private_attributes__.items(): 293 default = private_attr.get_default() 294 if default is not PydanticUndefined: 295 pydantic_private[name] = default 296 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.
154class DataSourceType(str, Enum): 155 """ 156 The source of a piece of data. 157 """ 158 159 human = "human" 160 synthetic = "synthetic"
The source of a piece of data.
163class DataSourceProperty(BaseModel): 164 name: str 165 type: Type[Union[str, int, float]] 166 required_for: List[DataSourceType] = [] 167 not_allowed_for: List[DataSourceType] = []
Usage docs: https://docs.pydantic.dev/2.8/concepts/models/
A base class for creating Pydantic models.
Attributes: __class_vars__: The names of classvars defined on the model. __private_attributes__: Metadata about the private attributes of the model. __signature__: The signature for instantiating the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
__args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a `RootModel`.
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__: An instance attribute with the values of extra fields from validation when
`model_config['extra'] == 'allow'`.
__pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
__pydantic_private__: Instance attribute with the values of private attributes set on the model instance.
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.
331class TaskRequirement(BaseModel): 332 id: ID_TYPE = ID_FIELD 333 name: str = SHORT_NAME_FIELD 334 description: str | None = Field(default=None) 335 instruction: str = Field(min_length=1) 336 priority: Priority = Field(default=Priority.p2)
Usage docs: https://docs.pydantic.dev/2.8/concepts/models/
A base class for creating Pydantic models.
Attributes: __class_vars__: The names of classvars defined on the model. __private_attributes__: Metadata about the private attributes of the model. __signature__: The signature for instantiating the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
__args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a `RootModel`.
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__: An instance attribute with the values of extra fields from validation when
`model_config['extra'] == 'allow'`.
__pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
__pydantic_private__: Instance attribute with the values of private attributes set on the model instance.
339class TaskDeterminism(str, Enum): 340 deterministic = "deterministic" # Expect exact match 341 semantic_match = "semantic_match" # Expect same meaning, but flexible on expression of the meaning 342 flexible = "flexible" # Flexible on semantic output. Eval should be custom based on parsing requirements.
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.