Prepared fundamental data

Import basic libraries

In [1]:
# Please refer to xarray.pydata.org for xarray documentation.

# xarray works optimally with N-dimensional datasets in Python
# and is well suited for financial datasets with labels "time",
# "field" and "asset". xarray data structures can also be easily
# converted to pandas dataframes.

import xarray as xr
import numpy as np
import pandas as pd

# Import quantnet libraries.
import qnt.data    as qndata
import qnt.stepper as qnstepper
import qnt.stats   as qnstats
import qnt.graph   as qngraph
import qnt.forward_looking as qnfl
import datetime    as dt
import itertools
import qnt.data.secgov_indicators
from qnt.neutralization import neutralize
from qnt.data.secgov_indicators import InstantIndicatorBuilder, PeriodIndicatorBuilder

Load assets and market data

In [2]:
# Load all available asset names since the given date.
assets = qndata.load_assets(tail=dt.timedelta(days=5*365))
assets_names = [i["id"] for i in assets]

# A submission will be accepted only if no max_date is set,
# as submissions will be evaluated on live data on a daily basis.
data = qndata.load_data(tail=dt.timedelta(days=5*365),
                        dims=("time", "field", "asset"),
                        assets=assets_names,
                        forward_order=True)

# We trade only liquid stocks
is_liquid = data.loc[:,"is_liquid",:]

# replace zeros in liquid with NaN
index = np.array(abs(is_liquid - 1),dtype = bool)
is_liquid.to_pandas()[index] = np.nan
fetched chunk 1/7 4s
fetched chunk 2/7 7s
fetched chunk 3/7 9s
fetched chunk 4/7 11s
fetched chunk 5/7 13s
fetched chunk 6/7 15s
fetched chunk 7/7 17s
Data loaded 17s

Load fundamental data

We have collected and processed a large amount of fundamental data for users. One can find the list of prepared data here. Below are two ways to download prepared data.

Also, users can extract and prepare fundamental data by themselves.

In [3]:
# The first way is just to list the desired data labels.

# Let's say we have a list of desired fundamental data
data_lbls = ['assets', 'liabilities', 'operating_expense', 'ivestment_short_term']

# One can load corresponding data
fun_data1 = qnt.data.secgov_load_indicators(assets,time_coord = data.time, standard_indicators = data_lbls)
load secgov facts...
fetched chunk 1 / 23 2 s
fetched chunk 2 / 23 3 s
fetched chunk 3 / 23 4 s
fetched chunk 4 / 23 6 s
fetched chunk 5 / 23 7 s
fetched chunk 6 / 23 8 s
fetched chunk 7 / 23 10 s
fetched chunk 8 / 23 11 s
fetched chunk 9 / 23 13 s
fetched chunk 10 / 23 14 s
fetched chunk 11 / 23 15 s
fetched chunk 12 / 23 17 s
fetched chunk 13 / 23 18 s
fetched chunk 14 / 23 19 s
fetched chunk 15 / 23 20 s
fetched chunk 16 / 23 21 s
fetched chunk 17 / 23 22 s
fetched chunk 18 / 23 23 s
fetched chunk 19 / 23 25 s
fetched chunk 20 / 23 27 s
fetched chunk 21 / 23 29 s
fetched chunk 22 / 23 30 s
fetched chunk 23 / 23 30 s
facts loaded.
In [4]:
# The second way to load fundamental data.

# One can make their own builder that takes a name and a list of desired us:gaap tickets. 

# Some indicators are instant and updated regularly within each report
instant_data_list = [InstantIndicatorBuilder('assets' , ['us-gaap:Assets'], True), 
                     InstantIndicatorBuilder('liabilities', ['us-gaap:Liabilities'], True),
                    InstantIndicatorBuilder('shares', ['us-gaap:CommonStockSharesOutstanding', 
                                                       'us-gaap:CommonStockSharesIssued'], True)]


# Others are periodical and correspond to a certain period. For example, operating expenses and sales.

# For periodical indicators, you can receive information with the quarter, annual frequency, or 'last twelve month' value.
# For these purposes put 'qf','af' or 'ltm' correspondingly:

period_data_list = [PeriodIndicatorBuilder('operating_expense', ['us-gaap:OperatingExpenses'], True, 'qf'),
                   PeriodIndicatorBuilder('sales_revenue', ['us-gaap:SalesRevenueGoodsNet',
                                                            'us-gaap:SalesRevenueNet',
                                                            'us-gaap:RevenueFromContractWithCustomerIncludingAssessedTax'
                                                           ], True, 'af'),
                    PeriodIndicatorBuilder('sga_expense', ['us-gaap:SellingGeneralAndAdministrativeExpense'], True, 'ltm')]

# For the first way to load data, 'ltm' set as default!

# Load data
# fun_data2 = qnt.data.secgov_load_indicators(assets,time_coord = data.time, period_data_list)

Weights allocation

In [5]:
# In order to estimate Sharpe for a number of algorithms
def estimate_sharpe(weights_final):
    stat = qnstats.calc_stat(data, weights_final, slippage_factor=0.05)
    days = len(stat.coords["time"])
    returns = stat.loc[:, "relative_return"]
    
    sharpe_ratio = qnstats.calc_sharpe_ratio_annualized(
        returns,
        max_periods=days,
        min_periods=days).to_pandas().values[-1]
    
    print(f'Sharpe ratio = {sharpe_ratio}')
In [6]:
# Let's check an idea - invest more for operating expenses
# Operating expenses include marketing, noncapitalized R&D, travel and entertainment, office supply, rent, salary, cogs...
weights = fun_data1.sel(field = 'operating_expense')

# We trade only liquid stocks
weights = weights*is_liquid

# estimate sharpe ratio
estimate_sharpe(weights)
Sharpe ratio = 0.46142363098870925

We can reduce risks by neutralizing weights.

This means that the total amount of funds invested in the market is zero. The algorithm becomes independent of the general up / down trend of the market. This is important to avoid losses associated with the crisis.

In [7]:
# the strategy now is market neutral:
weights = fun_data1.sel(field = 'operating_expense')*is_liquid
weights = weights/abs(weights).sum('asset')
weights1 = -neutralize(weights,assets,group = 'market')

estimate_sharpe(weights1)
Sharpe ratio = -0.1821495880402316

If the Sharpe ratio is low, sometimes it is reasonable to combine several signals in one algorithm.

In [8]:
# the second signal
weights = fun_data1.sel(field = 'ivestment_short_term')*is_liquid
weights = weights/abs(weights).sum('asset')
weights2 = neutralize(weights,assets,group = 'market')

estimate_sharpe(weights2)
Sharpe ratio = 0.20262251701637382
In [9]:
# combined signals in one algorithm.
output = weights1+ weights2
output = output/abs(output).sum('asset')
estimate_sharpe(output)
Sharpe ratio = -0.44013587289377176

Statistics

In [10]:
def print_stat(stat):
    """Prints selected statistical key indicators:
       - the global Sharpe ratio of the strategy;
       - the global mean profit;
       - the global volatility;
       - the maximum drawdown.

       Note that Sharpe ratio, mean profit and volatility
       apply to  max simulation period, and not to the
       rolling basis of 3 years.
    """

    days = len(stat.coords["time"])
    
    returns = stat.loc[:, "relative_return"]

    equity = stat.loc[:, "equity"]
    
    sharpe_ratio = qnstats.calc_sharpe_ratio_annualized(
        returns,
        max_periods=days,
        min_periods=days).to_pandas().values[-1]

    profit = (qnstats.calc_mean_return_annualized(
        returns,
        max_periods=days,
        min_periods=days).to_pandas().values[-1])*100.0

    volatility = (qnstats.calc_volatility_annualized(
        returns,
        max_periods=days,
        min_periods=days).to_pandas().values[-1])*100.0

    max_ddown = (qnstats.calc_max_drawdown(
        qnstats.calc_underwater(equity)).to_pandas().values[-1])*100.0

    print("Sharpe Ratio         : ", "{0:.3f}".format(sharpe_ratio))
    print("Mean Return [%]      : ", "{0:.3f}".format(profit))
    print("Volatility [%]       : ", "{0:.3f}".format(volatility))
    print("Maximum Drawdown [%] : ", "{0:.3f}".format(-max_ddown))
In [11]:
stat = qnstats.calc_stat(data, output, slippage_factor=0.05)
print_stat(stat)
Sharpe Ratio         :  -0.440
Mean Return [%]      :  -7.250
Volatility [%]       :  16.472
Maximum Drawdown [%] :  51.338
In [12]:
# show plot with profit and losses:
performance = stat.to_pandas()["equity"].iloc[:]
qngraph.make_plot_filled(performance.index, performance, name="PnL (Equity)", type="log")
In [13]:
# Check correlation
qnstats.print_correlation(output, data)
correlation check disabled

Ok. This strategy does not correlate with other strategies.
In [14]:
qndata.write_output(output)
write output: /root/fractions.nc.gz