[docs]def__init__(self,logical_type=None,semantic_tags=None,use_standard_tags=False,description=None,origin=None,metadata=None,validate=True,):"""Create ColumnSchema Args: logical_type (LogicalType, optional): The column's LogicalType. semantic_tags (str, list, set, optional): The semantic tag(s) specified for the column. use_standard_tags (boolean, optional): If True, will add standard semantic tags to the column based on the specified logical type if a logical type is defined for the column. Defaults to False. description (str, optional): User description of the column. origin (str, optional): Origin of the column (i.e. "base" or "engineered"). metadata (dict[str -> json serializable], optional): Extra metadata provided by the user. The dictionary must contain data types that are JSON serializable such as string, integers, and floats. DataFrame and Series types are not supported. validate (bool, optional): Whether to perform parameter validation. Defaults to True. """metadata=metadataor{}ifisclass(logical_type):logical_type=logical_type()ifvalidate:iflogical_typeisnotNone:_validate_logical_type(logical_type)_validate_description(description)_validate_origin(origin)_validate_metadata(metadata)self._metadata=metadataself._description=descriptionself._origin=originself.logical_type=logical_typeself.use_standard_tags=use_standard_tagssemantic_tags=self._get_column_tags(semantic_tags,validate)self.semantic_tags=semantic_tags
def__eq__(self,other,deep=True):ifself.use_standard_tags!=other.use_standard_tags:returnFalseifself.logical_type!=other.logical_type:returnFalseifself.semantic_tags!=other.semantic_tags:returnFalseifself.description!=other.description:returnFalseifself.origin!=other.origin:returnFalseifdeepandself.metadata!=other.metadata:returnFalsereturnTruedef__repr__(self):msg="<ColumnSchema"ifself.logical_typeisnotNone:msg+=" (Logical Type = {})".format(self.logical_type)ifself.semantic_tags:msg+=" (Semantic Tags = {})".format(sorted(list(self.semantic_tags)))msg+=">"returnmsgdef_get_column_tags(self,semantic_tags,validate):semantic_tags=_convert_input_to_set(semantic_tags,error_language="semantic_tags",validate=validate,)ifself.use_standard_tags:ifself.logical_typeisNone:raiseValueError("Cannot use standard tags when logical_type is None")semantic_tags=semantic_tags.union(self.logical_type.standard_tags)returnsemantic_tags@propertydefdescription(self):"""Description of the column"""returnself._description@description.setterdefdescription(self,description):_validate_description(description)self._description=description@propertydeforigin(self):"""Origin of the column"""returnself._origin@origin.setterdeforigin(self,origin):_validate_origin(origin)self._origin=origin@propertydefmetadata(self):"""Metadata of the column"""returnself._metadata@metadata.setterdefmetadata(self,metadata):metadata=metadataor{}_validate_metadata(metadata)self._metadata=metadata@propertydefis_numeric(self):"""Whether the ColumnSchema is numeric in nature"""return(self.logical_typeisnotNoneand"numeric"inself.logical_type.standard_tags)@propertydefis_categorical(self):"""Whether the ColumnSchema is categorical in nature"""return(self.logical_typeisnotNoneand"category"inself.logical_type.standard_tags)@propertydefis_datetime(self):"""Whether the ColumnSchema is a Datetime column"""returntype(self.logical_type)==Datetime@propertydefis_latlong(self):"""Whether the ColumnSchema is a LatLong column"""returntype(self.logical_type)==LatLong@propertydefis_boolean(self):"""Whether the ColumnSchema is a Boolean column"""ltype_class=type(self.logical_type)returnltype_class==Booleanorltype_class==BooleanNullable@propertydefis_natural_language(self):"""Whether the ColumnSchema is a Natural Language column"""returntype(self.logical_type)==NaturalLanguage@propertydefis_unknown(self):"""Whether the ColumnSchema is a Unknown column"""returntype(self.logical_type)==Unknown@propertydefis_ordinal(self):"""Whether the ColumnSchema is a Ordinal column"""returntype(self.logical_type)==Ordinaldef_add_semantic_tags(self,new_tags,name):"""Add the specified semantic tags to the current set of tags Args: new_tags (str/list/set): The new tags to add name (str): Name of the column to use in warning """new_tags=_convert_input_to_set(new_tags)duplicate_tags=sorted(list(self.semantic_tags.intersection(new_tags)))ifduplicate_tags:warnings.warn(DuplicateTagsWarning().get_warning_message(duplicate_tags,name),DuplicateTagsWarning,)self.semantic_tags=self.semantic_tags.union(new_tags)def_remove_semantic_tags(self,tags_to_remove,name):"""Removes specified semantic tags from from the current set of tags Args: tags_to_remove (str/list/set): The tags to remove name (str): Name of the column to use in warning """tags_to_remove=_convert_input_to_set(tags_to_remove)invalid_tags=sorted(list(tags_to_remove.difference(self.semantic_tags)))ifinvalid_tags:raiseLookupError(f"Semantic tag(s) '{', '.join(invalid_tags)}' not present on column '{name}'",)ifself.use_standard_tagsandsorted(list(tags_to_remove.intersection(self.logical_type.standard_tags)),):warnings.warn(StandardTagsChangedWarning().get_warning_message(notself.use_standard_tags,name,),StandardTagsChangedWarning,)self.semantic_tags=self.semantic_tags.difference(tags_to_remove)def_reset_semantic_tags(self):"""Reset the set of semantic tags to the default values. The default values will be either an empty set or the standard tags, controlled by the use_standard_tags boolean. """new_tags=set()ifself.use_standard_tags:new_tags=set(self.logical_type.standard_tags)self.semantic_tags=new_tagsdef_set_semantic_tags(self,semantic_tags):"""Replace current semantic tags with new values. If use_standard_tags is set to True, standard tags will be added as well. Args: semantic_tags (str/list/set): New semantic tag(s) to set """semantic_tags=_convert_input_to_set(semantic_tags)ifself.use_standard_tags:semantic_tags=semantic_tags.union(self.logical_type.standard_tags)self.semantic_tags=semantic_tags@propertydefcustom_tags(self):"""The custom semantic tag(s) specified for the column."""standard_tags=set()ifself.use_standard_tags:standard_tags|=self.logical_type.standard_tagsreturnself.semantic_tags-standard_tags-{"index","time_index"}
def_validate_logical_type(logical_type):iftype(logical_type)notinww.type_system.registered_types:raiseTypeError(f"logical_type {logical_type} is not a registered LogicalType.")def_validate_description(column_description):ifcolumn_descriptionisnotNoneandnotisinstance(column_description,str):raiseTypeError("Column description must be a string")def_validate_origin(origin):iforiginisnotNoneandnotisinstance(origin,str):raiseTypeError("Column origin must be a string")def_validate_metadata(column_metadata):ifnotisinstance(column_metadata,dict):raiseTypeError("Column metadata must be a dictionary")