Source code for woodwork.demo.retail


import pandas as pd

import woodwork as ww
from woodwork.logical_types import (
    Boolean,
    Categorical,
    Datetime,
    Double,
    Integer,
    NaturalLanguage
)


[docs]def load_retail(id='demo_retail_data', nrows=None, return_dataframe=False): """Load a demo retail dataset into either a DataTable or a DataFrame Args: id (str, optional): The name to assign to the DataTable, if returning a DataTable. If not returning a DataTable, this will be ignored. Defaults to ``demo_retail_data``. nrows (int, optional): The number of rows to return in the dataset. If None, will return all possible rows. Defaults to None. return_dataframe (bool): If True, will return a pandas DataFrame. If False, will return a Woodwork DataTable. Defaults to False. Returns: pd.DataFrame or ww.DataTable: A DataFrame or DataTable containing the demo data. """ csv_s3_gz = "https://api.featurelabs.com/datasets/online-retail-logs-2018-08-28.csv.gz?version=" + ww.__version__ csv_s3 = "https://api.featurelabs.com/datasets/online-retail-logs-2018-08-28.csv?version=" + ww.__version__ # Try to read in gz compressed file try: df = pd.read_csv(csv_s3_gz, nrows=nrows, parse_dates=["order_date"]) # Fall back to uncompressed except Exception: df = pd.read_csv(csv_s3, nrows=nrows, parse_dates=["order_date"]) # Add unique column for index df.insert(0, 'order_product_id', range(len(df))) if return_dataframe: return df logical_types = { 'order_product_id': Categorical, 'order_id': Categorical, 'product_id': Categorical, 'description': NaturalLanguage, 'quantity': Integer, 'order_date': Datetime, 'unit_price': Double, 'customer_name': Categorical, 'country': Categorical, 'total': Double, 'cancelled': Boolean, } dt = ww.DataTable(df, name=id, index='order_product_id', time_index='order_date', logical_types=logical_types) return dt