first commit

This commit is contained in:
Waard 2025-06-11 13:26:49 +02:00
commit c5746ea7b1
6 changed files with 40371 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
.venv/
build/
*.egg-info/

46
README.md Normal file
View File

@ -0,0 +1,46 @@
# Time series forecasting energy prices
![tsf](price_actual_over_time.png "Actual energy price")
With features representing energy generation of various sources (oil/biomass/hydro)
## Setup
### Prerequisites:
- Have a working version of `pip` preferably in Python 3.12
### Steps
1. Install `uv`
```bash
pip install uv
```
2. Create & activate virtual environment
```bash
uv venv .venv
# on Windows
.\.venv\Scripts\activate
# on Linux:
source .venv/bin/activate
```
3. Install the dependencies
```bash
uv pip install .
```
## Get hackin'
Look in `tsforecast.ipynb`
- Play around with some of the techniques
- Seasonal decompose
- SARIMA
- XGBoost
- LSTM
- Try to beat my MAE on test set of ~0.31 (I used LSTM)

161
ShapTime.py Normal file
View File

@ -0,0 +1,161 @@
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
# create super-time
def supertime(Tn, data):
dfx = []
lenth = int(len(data) / Tn)
start = len(data) - (lenth * Tn)
data_s = data[start:, :, :]
for i in range(Tn):
Ti = data_s[i * lenth : (i + 1) * lenth, :, :]
dfx.append(Ti)
return dfx
def supertime_add(Tn, data):
dfx = []
lenth = int(len(data) / Tn)
start = len(data) - (lenth * Tn)
data_s = data.iloc[start:, :]
for i in range(Tn):
Ti = data_s.iloc[i * lenth : (i + 1) * lenth]
dfx.append(Ti)
return dfx
# create ShapTime
def get_sub_set(Tn):
mylist = list(range(Tn))
sub_sets = [[]]
for x in mylist:
sub_sets.extend([item + [x] for item in sub_sets])
return sub_sets
def ValFunction(model, interp_x, Tn):
dfx = supertime(Tn, interp_x)
# Generate subsets of indexes
subset = get_sub_set(Tn)
# Generate the baseline
y_results = model.predict(interp_x)
baseline = sum(y_results) / len(interp_x)
val_results = []
for i in range(1, len(subset)):
x_i = dfx[subset[i][0]]
if len(subset[i]) == 1:
prediction = model.predict(x_i)
results = (sum(prediction) / len(x_i)) - baseline
val_results.append(results)
else:
for n in range(1, len(subset[i])):
x_i = np.vstack([x_i, dfx[subset[i][n]]])
prediction = model.predict(x_i)
results = (sum(prediction) / len(x_i)) - baseline
val_results.append(results)
val_results.insert(0, 0.0)
return subset, val_results
def index(Si, subset):
for i in range(len(subset)):
if Si == subset[i]:
index = i
else:
pass
return index
def ShapleyValues(model, interp_x, Tn):
subset, val_results = ValFunction(model, interp_x, Tn)
shapley_values = []
for i in range(Tn):
shapley = []
for n in range(len(subset)):
if i not in subset[n]:
Si = subset[n] + [i]
Si.sort()
Si_num = index(Si, subset)
S_num = len(subset[n])
N = Tn
weight = (
math.factorial(S_num) * math.factorial((N - S_num - 1))
) / math.factorial(N)
val = val_results[Si_num] - val_results[n]
shapley_i = weight * val
shapley.append(shapley_i)
else:
pass
shapley_values.append(sum(shapley))
del shapley
return shapley_values
def trans(original):
results_exp = []
for i in range(len(original)):
results_exp.append(float(original[i]))
return results_exp
def TimeImportance(Tn, ST_value, time_columns):
time_list = list(range(Tn))
shapley_impor = pd.DataFrame(index=time_list, columns=["ShapTime"])
shapley_impor["ShapTime"] = ST_value
shapley_impor["absolute"] = abs(shapley_impor["ShapTime"])
shapley_impor.index = time_columns
shapley_impor.sort_values(by="absolute", inplace=True, ascending=False)
sns.set(context="paper", style="ticks", font_scale=2)
ax = sns.barplot(
x="ShapTime",
y=shapley_impor.index,
data=shapley_impor,
orient="h",
color="lightskyblue",
palette="Blues_r",
)
return ax
def TimeHeatmap(Tn, ST_value, time_columns):
time_list = list(range(Tn))
shapley_df = pd.DataFrame(index=time_list, columns=["ShapTime"])
shapley_df["ShapTime"] = ST_value
shapley_df_abs = abs(shapley_df)
shapley_df_abs.index = time_columns
sns.set(font_scale=1.3)
f, ax = plt.subplots(figsize=(13, 1))
sns.heatmap(shapley_df_abs.T, annot=False, linewidths=0, ax=ax, cmap="Blues")
return ax

BIN
price_actual_over_time.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 504 KiB

30
pyproject.toml Normal file
View File

@ -0,0 +1,30 @@
[project]
name = "time_series_forecasting"
version = "0.1.0"
description = "A setup for time series forecasting challenge with energy consumption data."
authors = [
{ name="Tristan de Waard", email="tristan.de.waard@sogeti.com" }
]
requires-python = ">=3.12,<3.13"
dependencies = [
"numpy",
"pandas",
"seaborn",
"xgboost",
"matplotlib",
"plotly",
"tensorflow-cpu",
"scikit-learn",
"statsmodels",
"keras",
"ipykernel",
"ipywidgets",
"shap",
"lime",
"pip"
]
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"

40131
tsforecast.ipynb Normal file

File diff suppressed because one or more lines are too long