Fundamental analysis

In [1]:
# Import basic libraries for manipulating data.

# Please refer to xarray.pydata.org for xarray documentation.

# xarray works optimally with N-dimensional datasets in Python
# and is well suited for financial datasets with labels "time",
# "field" and "asset". xarray data structures can also be easily
# converted to pandas dataframes.

import xarray as xr

import numpy as np
import pandas as pd

# Import quantnet libraries.

import qnt.data as qndata          # data loading and manipulation
import qnt.stepper as qnstepper    # strategy definition
import qnt.stats as qnstats        # key statistics
import qnt.graph as qngraph        # graphical tools
import qnt.forward_looking as qnfl # forward looking checking
import qnt.ta as qnta              # TA functions

# display function for fancy displaying:
from IPython.display import display
import json
import time

import datetime as dt
In [2]:
# Load all available asset names since given date.

assets = qndata.load_assets(tail=dt.timedelta(days=5*365))

assets_names = [i["id"] for i in assets]

# Load all available data since given date.

# It is possible to set a max_date in the call in order to
# develop the system on a limited in-sample period and later
# test the system on unseen data after max_date.

# A submission will be accepted only if no max_date is set,
# as submissions will be evaluated on live data on a daily basis.

data = qndata.load_data(tail=dt.timedelta(days=5*365),
                        dims=("time", "field", "asset"),
                        assets=assets_names,
                        forward_order=True)
fetched chunk 1/7 3s
fetched chunk 2/7 7s
fetched chunk 3/7 9s
fetched chunk 4/7 11s
fetched chunk 5/7 13s
fetched chunk 6/7 15s
fetched chunk 7/7 17s
Data loaded 17s
In [3]:
# A buy-and-hold strategy on liquid assets allocates
# constant fractions of capital to all liquid assets.
# Here xarray data structures are converted to pandas
# dataframes for simplicity in order to describe the
# development process.

# xarray.DataArray are converted to pandas dataframes:
is_liquid = data.loc[:,"is_liquid",:]

# set and normalize weights:
weights = is_liquid / is_liquid.sum('asset').fillna(0.0)

# display statistics for B'n'H strategy
    
stat = qnstats.calc_stat(data, weights, slippage_factor=0.05)

display(stat.to_pandas().tail())
field equity relative_return volatility underwater max_drawdown sharpe_ratio mean_return bias instruments avg_turnover avg_holding_time
time
2024-04-15 1.651261 -0.008097 0.151958 -0.040388 -0.381171 0.135445 0.020582 1.0 1130.0 0.025970 136.170254
2024-04-16 1.646240 -0.003041 0.151963 -0.043306 -0.381171 0.133619 0.020305 1.0 1130.0 0.025976 136.170254
2024-04-17 1.641856 -0.002663 0.151959 -0.045853 -0.381171 0.119679 0.018186 1.0 1130.0 0.025975 136.170254
2024-04-18 1.639699 -0.001314 0.151961 -0.047107 -0.381171 0.115374 0.017532 1.0 1130.0 0.025971 136.308371
2024-04-19 1.639716 0.000010 0.151813 -0.047097 -0.381171 0.141324 0.021455 1.0 1130.0 0.025967 215.571693

Fundamental data

We will use some fundamental indicators to improve this strategy.

At first, let's retrieve the data from the fundamental database.

You can also discover available attributes here: http://xbrlview.fasb.org/yeti/resources/yeti-gwt/Yeti.jsp (us-gaap taxonomy)

In [4]:
# we will use this function to load parsed XBRL fillings
help(qndata.load_secgov_forms)
Help on function load_secgov_forms in module qnt.data.secgov:

load_secgov_forms(ciks: Union[NoneType, List[str]] = None, types: Union[NoneType, List[str]] = None, facts: Union[NoneType, List[str]] = None, skip_segment: bool = False, min_date: Union[str, datetime.date] = None, max_date: Union[str, datetime.date, NoneType] = None, tail: Union[datetime.timedelta, float, int] = None) -> Generator[dict, NoneType, NoneType]
    Load SEC Forms (Fundamental data)
    :param ciks: list of cik (you can get cik from asset id)
    :param types: list of form types: ['10-K', '10-Q', '10-K/A', '10-Q/A']
    :param facts: list of facts for extraction, for example: ['us-gaap:Goodwill']
    :param skip_segment: skip facts with segment
    :param min_date: min form date
    :param max_date: max form date
    :param tail: datetime.timedelta, tail size of data. min_date = max_date - tail
    :return: generator

In [5]:
# build dict cik -> asset
ciks = [(a['cik'], a) for a in assets if a['cik'] is not None]
ciks = dict(ciks)

# define result array
fundamental = xr.concat(
    [data.sel(field='close')] * 4, 
    pd.Index(['assets', 'liabilities', 'shares', 'eps'], name='field')
)
fundamental[:] = np.nan


def extract_last_fact(facts, name):
    res = tuple(f for f in facts if f['name'] == name)
    if len(res) > 0:
        res = max(res, key = lambda f: f['period']['value'])
        res = res['value']
    else:    
        res = None 
    return res


print_first_record = True
progress = 0
st = time.time()

for form in qndata.load_secgov_forms(
    ciks=list(ciks.keys()), # load only liquid ciks
    types=['10-Q'], # only quarter reports 
    facts=[
        'us-gaap:EarningsPerShareDiluted',
        'us-gaap:Liabilities', 
        'us-gaap:Assets',
        'us-gaap:CommonStockSharesOutstanding'
    ],
    skip_segment = True,
    tail=dt.timedelta(days=5*365)
):
    facts = form['facts']
    
    # print first report to analyze the structure
    if len(facts) > 0 and print_first_record:
        print("The report example:")
        display(form)
        print_first_record = False
        
    # display progress
    progress += 1
    if progress % 500 == 0:
        print("Progress:", progress, form['date'], time.time() - st)
    
    # process report
    asset_id = ciks[form['cik']]['id']
    if asset_id not in data.asset:
        continue
    
    date = form['date']
    date = fundamental.time.loc[date:]
    if len(date) < 1:
        print("wrong date", form['date'])
        continue
    date = date[0].values
    
    fundamental.loc[{'asset':asset_id, 'time':date, 'field':'shares'}] \
            = extract_last_fact(facts, 'us-gaap:CommonStockSharesOutstanding')
    
    fundamental.loc[{'asset':asset_id, 'time':date, 'field':'eps'}] \
            = extract_last_fact(facts, 'us-gaap:EarningsPerShareDiluted')
    
    fundamental.loc[{'asset':asset_id, 'time':date, 'field':'liabilities'}] \
            = extract_last_fact(facts, 'us-gaap:Liabilities')
    
    fundamental.loc[{'asset':asset_id, 'time':date, 'field':'assets'}] \
            = extract_last_fact(facts, 'us-gaap:Assets')
    
print('Loaded')
The report example:
{'schemes': {'xbrli': 'http://www.xbrl.org/2003/instance',
  'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
  'xbrll': 'http://www.xbrl.org/2003/linkbase',
  'xlink': 'http://www.w3.org/1999/xlink',
  'xbrldi': 'http://xbrl.org/2006/xbrldi',
  'us-gaap': 'http://fasb.org/us-gaap/2018-01-31',
  'dei': 'http://xbrl.sec.gov/dei/2018-01-31',
  'xml': 'http://www.w3.org/XML/1998/namespace',
  'xhtml': 'http://www.w3.org/1999/xhtml'},
 'facts': [{'name': 'us-gaap:EarningsPerShareDiluted',
   'identifier': {'schema': 'http://www.sec.gov/CIK', 'value': '0001000228'},
   'period': {'type': 'range', 'value': ['2017-12-31', '2018-03-31']},
   'unit': {'type': 'divide', 'value': ['iso4217:USD', 'xbrli:shares']},
   'decimals': 2,
   'value': 0.91,
   'segment': None},
  {'name': 'us-gaap:CommonStockSharesOutstanding',
   'identifier': {'schema': 'http://www.sec.gov/CIK', 'value': '0001000228'},
   'period': {'type': 'instant', 'value': '2018-12-29'},
   'unit': {'type': 'measure', 'value': 'xbrli:shares'},
   'decimals': 'INF',
   'value': 151401668,
   'segment': None},
  {'name': 'us-gaap:Assets',
   'identifier': {'schema': 'http://www.sec.gov/CIK', 'value': '0001000228'},
   'period': {'type': 'instant', 'value': '2019-03-30'},
   'unit': {'type': 'measure', 'value': 'iso4217:USD'},
   'decimals': -3,
   'value': 7144864000,
   'segment': None},
  {'name': 'us-gaap:CommonStockSharesOutstanding',
   'identifier': {'schema': 'http://www.sec.gov/CIK', 'value': '0001000228'},
   'period': {'type': 'instant', 'value': '2019-03-30'},
   'unit': {'type': 'measure', 'value': 'xbrli:shares'},
   'decimals': 'INF',
   'value': 148996092,
   'segment': None},
  {'name': 'us-gaap:Assets',
   'identifier': {'schema': 'http://www.sec.gov/CIK', 'value': '0001000228'},
   'period': {'type': 'instant', 'value': '2018-12-29'},
   'unit': {'type': 'measure', 'value': 'iso4217:USD'},
   'decimals': -3,
   'value': 8500527000,
   'segment': None},
  {'name': 'us-gaap:Liabilities',
   'identifier': {'schema': 'http://www.sec.gov/CIK', 'value': '0001000228'},
   'period': {'type': 'instant', 'value': '2018-12-29'},
   'unit': {'type': 'measure', 'value': 'iso4217:USD'},
   'decimals': -3,
   'value': 4646583000,
   'segment': None},
  {'name': 'us-gaap:Liabilities',
   'identifier': {'schema': 'http://www.sec.gov/CIK', 'value': '0001000228'},
   'period': {'type': 'instant', 'value': '2019-03-30'},
   'unit': {'type': 'measure', 'value': 'iso4217:USD'},
   'decimals': -3,
   'value': 3443491000,
   'segment': None},
  {'name': 'us-gaap:EarningsPerShareDiluted',
   'identifier': {'schema': 'http://www.sec.gov/CIK', 'value': '0001000228'},
   'period': {'type': 'range', 'value': ['2018-12-30', '2019-03-30']},
   'unit': {'type': 'divide', 'value': ['iso4217:USD', 'xbrli:shares']},
   'decimals': 2,
   'value': 0.73,
   'segment': None}],
 'id': '0001000228-19-000030',
 'type': '10-Q',
 'cik': '1000228',
 'date': '2019-05-07',
 'name': 'HENRY SCHEIN INC',
 'url': 'https://www.sec.gov/Archives/edgar/data/1000228/000100022819000030/hsic-20190330.xml'}
Progress: 500 2021-04-21 10.690224647521973
Progress: 1000 2023-11-07 18.608234405517578
Progress: 1500 2021-01-29 26.551189422607422
Progress: 2000 2020-08-06 33.129902362823486
Progress: 2500 2021-10-28 40.88055467605591
Progress: 3000 2023-08-08 47.083353757858276
Progress: 3500 2019-09-10 52.467479944229126
Progress: 4000 2021-07-29 59.92633056640625
Progress: 4500 2020-11-03 65.30855441093445
Progress: 5000 2020-10-29 71.79057717323303
Progress: 5500 2023-10-25 78.19872331619263
Progress: 6000 2020-01-30 84.06945300102234
Progress: 6500 2019-08-02 90.60578751564026
Progress: 7000 2020-05-07 98.4757297039032
Progress: 7500 2019-08-13 104.79903531074524
Progress: 8000 2019-07-25 111.02481985092163
Progress: 8500 2023-11-06 121.12369227409363
Progress: 9000 2020-05-05 129.71595811843872
Progress: 9500 2021-11-03 138.0624599456787
Progress: 10000 2019-08-02 145.39202547073364
Progress: 10500 2021-04-29 154.1943678855896
Progress: 11000 2023-08-08 160.92602062225342
Progress: 11500 2021-07-29 169.38194465637207
Progress: 12000 2022-05-04 177.24717736244202
Progress: 12500 2019-04-30 186.90014266967773
Loaded
In [6]:
# now we will prepare some ratios
ratios = xr.concat(
    [data.sel(field='close')] * 4, 
    pd.Index(['price/earnings', 'price/equity', 'liabilites/equity', 'market_cap'], name='field')
)

eps_y = qnta.sma(fundamental.sel(field='eps'), 4) * 4
ratios.loc[{'field':'price/earnings'}] = data.sel(field='close') / eps_y#.ffill('time')

equity = fundamental.sel(field='assets') - fundamental.sel(field='liabilities')
equity_per_share = equity/fundamental.sel(field='shares')

ratios.loc[{'field':'price/equity'}] = data.sel(field='close') / equity_per_share#.ffill('time')
ratios.loc[{'field':'liabilites/equity'}] = fundamental.sel(field='liabilities') / equity#.ffill('time')
ratios.loc[{'field':'market_cap'}] = data.sel(field='close') * fundamental.sel(field='shares')#.ffill('time')

ratios = ratios.ffill('time')
In [7]:
# we will use these ratios to build the output
output = data.sel(field="is_liquid")

output = output.where(ratios.sel(field='price/earnings') > 4)
output = output.where(ratios.sel(field='price/earnings') < 25)

output = output.where(ratios.sel(field='price/equity') > 0.7)
output = output.where(ratios.sel(field='price/equity') < 1.3)

output = output.where(ratios.sel(field='liabilites/equity') > 2)
output = output.where(ratios.sel(field='liabilites/equity') < 8)

output = output.where(ratios.sel(field='market_cap').rank('asset') > 50)

output = output.fillna(0)
output /= output.sum('asset')
output = xr.where(output > 0.1, 0.1, output)

# display stats
stat = qnstats.calc_stat(data, output, slippage_factor=0.05)

display(stat.to_pandas().tail())

# print(output.isel(time=-1).dropna('asset').asset.values)
field equity relative_return volatility underwater max_drawdown sharpe_ratio mean_return bias instruments avg_turnover avg_holding_time
time
2024-04-15 1.098591 0.000148 0.056159 -0.060230 -0.101113 -0.140322 -0.007880 1.0 7.0 0.004118 203.000000
2024-04-16 1.097097 -0.001360 0.056165 -0.061508 -0.101113 -0.148203 -0.008324 1.0 7.0 0.004118 203.000000
2024-04-17 1.096447 -0.000592 0.056165 -0.062064 -0.101113 -0.153092 -0.008598 1.0 7.0 0.004117 203.000000
2024-04-18 1.099512 0.002795 0.056189 -0.059443 -0.101113 -0.137571 -0.007730 1.0 7.0 0.004117 203.000000
2024-04-19 1.101591 0.001891 0.056194 -0.057664 -0.101113 -0.118085 -0.006636 1.0 7.0 0.004120 199.857143

Statistics

In [8]:
def print_stat(stat):
    """Prints selected statistical key indicators:
       - the global Sharpe ratio of the strategy;
       - the global mean profit;
       - the global volatility;
       - the maximum drawdown.

       Note that Sharpe ratio, mean profit and volatility
       apply to  max simulation period, and not to the
       rolling basis of 3 years.
    """

    days = len(stat.coords["time"])
    
    returns = stat.loc[:, "relative_return"]

    equity = stat.loc[:, "equity"]
    
    sharpe_ratio = qnstats.calc_sharpe_ratio_annualized(
        returns,
        max_periods=days,
        min_periods=days).to_pandas().values[-1]

    profit = (qnstats.calc_mean_return_annualized(
        returns,
        max_periods=days,
        min_periods=days).to_pandas().values[-1])*100.0

    volatility = (qnstats.calc_volatility_annualized(
        returns,
        max_periods=days,
        min_periods=days).to_pandas().values[-1])*100.0

    max_ddown = (qnstats.calc_max_drawdown(
        qnstats.calc_underwater(equity)).to_pandas().values[-1])*100.0

    print("Sharpe Ratio         : ", "{0:.3f}".format(sharpe_ratio))
    print("Mean Return [%]      : ", "{0:.3f}".format(profit))
    print("Volatility [%]       : ", "{0:.3f}".format(volatility))
    print("Maximum Drawdown [%] : ", "{0:.3f}".format(-max_ddown))

print_stat(stat)
Sharpe Ratio         :  0.405
Mean Return [%]      :  1.978
Volatility [%]       :  4.889
Maximum Drawdown [%] :  10.111
In [9]:
# show plot with profit and losses:
performance = stat.to_pandas()["equity"].iloc[(252*3):]
qngraph.make_plot_filled(performance.index, performance, name="PnL (Equity)", type="log")
In [10]:
# show underwater chart:
UWchart = stat.to_pandas()["underwater"].iloc[(252*3):]
qngraph.make_plot_filled(UWchart.index, UWchart, color="darkred", name="Underwater Chart", range_max=0)
In [11]:
# show rolling Sharpe ratio on a 3-year basis:
SRchart = stat.to_pandas()["sharpe_ratio"].iloc[(252*3):]
qngraph.make_plot_filled(SRchart.index, SRchart, color="#F442C5", name="Rolling SR")
In [12]:
# show bias chart:
biaschart = stat.to_pandas()["bias"].iloc[(252*3):]
qngraph.make_plot_filled(biaschart.index, biaschart, color="#5A6351", name="Bias Chart")

Write output

In [13]:
# Finally, we write the last mandatory step for submission,
# namely writing output to file:

qndata.write_output(output)
write output: /root/fractions.nc.gz