from woodwork.utils import camel_to_snake class ClassNameDescriptor(object): """Descriptor to convert a class's name from camelcase to snakecase """ def __get__(self, instance, class_): return camel_to_snake(class_.__name__) class LogicalTypeMetaClass(type): def __repr__(cls): return cls.__name__ class LogicalType(object, metaclass=LogicalTypeMetaClass): type_string = ClassNameDescriptor() dtype = 'string' standard_tags = {} def __eq__(self, other, deep=False): return isinstance(other, self.__class__) [docs]class Boolean(LogicalType): pandas_dtype = 'boolean' [docs]class Categorical(LogicalType): pandas_dtype = 'category' standard_tags = {'category'} [docs] def __init__(self, encoding=None): # encoding dict(str -> int) # user can specify the encoding to use downstream pass [docs]class CountryCode(LogicalType): pandas_dtype = 'category' standard_tags = {'category'} [docs]class Datetime(LogicalType): pandas_dtype = 'datetime64[ns]' [docs]class Double(LogicalType): pandas_dtype = 'float64' standard_tags = {'numeric'} [docs]class Integer(LogicalType): pandas_dtype = 'Int64' standard_tags = {'numeric'} [docs]class EmailAddress(LogicalType): pandas_dtype = 'string' [docs]class Filepath(LogicalType): pandas_dtype = 'string' [docs]class FullName(LogicalType): pandas_dtype = 'string' [docs]class IPAddress(LogicalType): pandas_dtype = 'string' [docs]class LatLong(LogicalType): pandas_dtype = 'string' [docs]class NaturalLanguage(LogicalType): pandas_dtype = 'string' [docs]class Ordinal(LogicalType): pandas_dtype = 'category' standard_tags = {'category'} [docs] def __init__(self, ranking=None): # ranking can be used specify the ordering (lowest to highest) pass [docs]class PhoneNumber(LogicalType): pandas_dtype = 'string' [docs]class SubRegionCode(LogicalType): pandas_dtype = 'category' standard_tags = {'category'} [docs]class Timedelta(LogicalType): pandas_dtype = 'timedelta64[ns]' [docs]class URL(LogicalType): pandas_dtype = 'string' [docs]class WholeNumber(LogicalType): """Represents Logical Types that contain natural numbers, including zero (0).""" pandas_dtype = 'Int64' standard_tags = {'numeric'} [docs]class ZIPCode(LogicalType): pandas_dtype = 'category' standard_tags = {'category'} def get_logical_types(): '''Returns a dictionary of logical type name strings and logical type classes''' # Get snake case strings logical_types = {logical_type.type_string: logical_type for logical_type in LogicalType.__subclasses__()} # Add class name strings class_name_dict = {logical_type.__name__: logical_type for logical_type in LogicalType.__subclasses__()} logical_types.update(class_name_dict) return logical_types def str_to_logical_type(logical_str, raise_error=True): logical_str = logical_str.lower() logical_types_dict = {ltype_name.lower(): ltype for ltype_name, ltype in get_logical_types().items()} if logical_str in logical_types_dict: return logical_types_dict[logical_str] elif raise_error: raise ValueError('String %s is not a valid logical type' % logical_str)