Skip to content

Added working with timseries data notebook #34

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[meta]
title="Working with Timeseries data in Singlestore"
description="SingleStore offers robust timeseries analytics functions, including interpolation methods can be implemented like linear interpolation for estimating missing values in timeseries datasets."
tags=["timeseries", "multimodelsupport"]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"cells":[{"cell_type":"code","execution_count":16,"id":"ffd07cf0-27e4-4b16-b68f-a9a576162c73","metadata":{"execution":{"iopub.execute_input":"2023-12-14T17:46:02.555404Z","iopub.status.busy":"2023-12-14T17:46:02.555154Z","iopub.status.idle":"2023-12-14T17:46:14.523142Z","shell.execute_reply":"2023-12-14T17:46:14.522665Z","shell.execute_reply.started":"2023-12-14T17:46:02.555389Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n <thead>\n <tr>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>","text/plain":"++\n||\n++\n++"},"execution_count":16,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\ndrop database time_series;"},{"attachments":{},"cell_type":"markdown","id":"2a13fd46-fc65-4feb-8561-5c23ee4b238c","metadata":{"language":"sql"},"source":"Working with Timeseries data "},{"cell_type":"code","execution_count":19,"id":"e2e6b5f4-b281-4d9b-a89e-c7bc4cbbdddd","metadata":{"execution":{"iopub.execute_input":"2023-12-14T17:46:21.583166Z","iopub.status.busy":"2023-12-14T17:46:21.582817Z","iopub.status.idle":"2023-12-14T17:46:32.436735Z","shell.execute_reply":"2023-12-14T17:46:32.436093Z","shell.execute_reply.started":"2023-12-14T17:46:21.583148Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n <thead>\n <tr>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>","text/plain":"++\n||\n++\n++"},"execution_count":19,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\ncreate database time_series;\n\nuse time_series;"},{"cell_type":"code","execution_count":23,"id":"a088019c-7186-4206-9064-cce748e6f15b","metadata":{"execution":{"iopub.execute_input":"2023-12-14T17:46:51.839734Z","iopub.status.busy":"2023-12-14T17:46:51.839498Z","iopub.status.idle":"2023-12-14T17:46:52.100763Z","shell.execute_reply":"2023-12-14T17:46:52.100190Z","shell.execute_reply.started":"2023-12-14T17:46:51.839720Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n <thead>\n <tr>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>","text/plain":"++\n||\n++\n++"},"execution_count":23,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nCREATE TABLE tick(\n ts datetime(6) ,\n symbol varchar(5),\n price numeric(18,4));"},{"attachments":{},"cell_type":"markdown","id":"d53ecf33-38b2-4a92-b7da-0e434bf9e64c","metadata":{"language":"python"},"source":"Inserting timeseries data into the table"},{"cell_type":"code","execution_count":27,"id":"13069021-1108-4d43-887d-913c3ad99e33","metadata":{"execution":{"iopub.execute_input":"2023-12-14T17:47:29.993695Z","iopub.status.busy":"2023-12-14T17:47:29.993424Z","iopub.status.idle":"2023-12-14T17:47:30.391754Z","shell.execute_reply":"2023-12-14T17:47:30.391157Z","shell.execute_reply.started":"2023-12-14T17:47:29.993675Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n <thead>\n <tr>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>","text/plain":"++\n||\n++\n++"},"execution_count":27,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nINSERT INTO tick VALUES\n('2020-02-18 10:55:36.179760', 'ABC', 100.00),\n('2020-02-18 10:57:26.179761', 'ABC', 101.00),\n('2020-02-18 10:59:16.178763', 'ABC', 102.50),\n('2020-02-18 11:00:56.179769', 'ABC', 102.00),\n('2020-02-18 11:01:37.179769', 'ABC', 103.00),\n('2020-02-18 11:02:46.179769', 'ABC', 103.00),\n('2020-02-18 11:02:59.179769', 'ABC', 102.60),\n('2020-02-18 11:02:46.179769', 'XYZ', 103.00),\n('2020-02-18 11:02:59.179769', 'XYZ', 102.60),\n('2020-02-18 11:03:59.179769', 'XYZ', 102.50);"},{"attachments":{},"cell_type":"markdown","id":"0e099c6b-43ac-4b69-ae3c-979d28b450ae","metadata":{"language":"sql"},"source":"Query to fetch high , low , max , min and volume for particular tick without using any window function "},{"cell_type":"code","execution_count":30,"id":"ca1117b9-e2eb-4dbd-abd2-ee6b1636d49d","metadata":{"execution":{"iopub.execute_input":"2023-12-14T17:47:48.760572Z","iopub.status.busy":"2023-12-14T17:47:48.760336Z","iopub.status.idle":"2023-12-14T17:47:49.007858Z","shell.execute_reply":"2023-12-14T17:47:49.007296Z","shell.execute_reply.started":"2023-12-14T17:47:48.760557Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n <thead>\n <tr>\n <th>ts</th>\n <th>symbol</th>\n <th>min_pr</th>\n <th>max_pr</th>\n <th>first</th>\n <th>last</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>2020-02-18 10:00:00</td>\n <td>ABC</td>\n <td>100.0000</td>\n <td>102.5000</td>\n <td>100.0000</td>\n <td>102.5000</td>\n </tr>\n <tr>\n <td>2020-02-18 11:00:00</td>\n <td>ABC</td>\n <td>102.0000</td>\n <td>103.0000</td>\n <td>102.0000</td>\n <td>102.6000</td>\n </tr>\n <tr>\n <td>2020-02-18 11:00:00</td>\n <td>XYZ</td>\n <td>102.5000</td>\n <td>103.0000</td>\n <td>103.0000</td>\n <td>102.5000</td>\n </tr>\n </tbody>\n</table>","text/plain":"+---------------------+--------+----------+----------+----------+----------+\n| ts | symbol | min_pr | max_pr | first | last |\n+---------------------+--------+----------+----------+----------+----------+\n| 2020-02-18 10:00:00 | ABC | 100.0000 | 102.5000 | 100.0000 | 102.5000 |\n| 2020-02-18 11:00:00 | ABC | 102.0000 | 103.0000 | 102.0000 | 102.6000 |\n| 2020-02-18 11:00:00 | XYZ | 102.5000 | 103.0000 | 103.0000 | 102.5000 |\n+---------------------+--------+----------+----------+----------+----------+"},"execution_count":30,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nWITH ranked AS\n(SELECT symbol,\n RANK() OVER w as r,\n MIN(price) OVER w as min_pr,\n MAX(price) OVER w as max_pr,\n FIRST_VALUE(price) OVER w as first,\n LAST_VALUE(price) OVER w as last,\n from_unixtime(unix_timestamp(ts) div (60*60) * (60*60)) as ts\n FROM tick\n WINDOW w AS (PARTITION BY symbol,\n from_unixtime(unix_timestamp(ts) div (60*60) * (60*60))\n ORDER BY ts\n ROWS BETWEEN UNBOUNDED PRECEDING\n AND UNBOUNDED FOLLOWING))\n\n\nSELECT ts, symbol, min_pr, max_pr, first, last\nFROM ranked\nWHERE r = 1\nORDER BY symbol, ts;\n"},{"attachments":{},"cell_type":"markdown","id":"d447a5d8-a6aa-402f-8a22-696fe1517827","metadata":{"language":"python"},"source":"Using First function\n\nSyntax : FIRST ( value [,time]);"},{"cell_type":"code","execution_count":33,"id":"f631d413-01ed-4bc1-b83a-18e4b6c8b9c2","metadata":{"execution":{"iopub.execute_input":"2023-12-14T17:48:08.043766Z","iopub.status.busy":"2023-12-14T17:48:08.043573Z","iopub.status.idle":"2023-12-14T17:48:08.232436Z","shell.execute_reply":"2023-12-14T17:48:08.231883Z","shell.execute_reply.started":"2023-12-14T17:48:08.043752Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n <thead>\n <tr>\n <th>first(price,ts)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>100.0000</td>\n </tr>\n </tbody>\n</table>","text/plain":"+-----------------+\n| first(price,ts) |\n+-----------------+\n| 100.0000 |\n+-----------------+"},"execution_count":33,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nSELECT first(price,ts) FROM tick;"},{"attachments":{},"cell_type":"markdown","id":"b5c5c8a7-c019-4293-af93-4948320216f3","metadata":{"language":"python"},"source":"Using Last function \n\nSyntax : LAST ( value [,time]);"},{"cell_type":"code","execution_count":34,"id":"ec5461a0-ba9e-4c03-aedb-06b5a1ee9ab2","metadata":{"execution":{"iopub.execute_input":"2023-12-14T17:48:15.896418Z","iopub.status.busy":"2023-12-14T17:48:15.896174Z","iopub.status.idle":"2023-12-14T17:48:16.095107Z","shell.execute_reply":"2023-12-14T17:48:16.094531Z","shell.execute_reply.started":"2023-12-14T17:48:15.896401Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n <thead>\n <tr>\n <th>last(price,ts)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>102.5000</td>\n </tr>\n </tbody>\n</table>","text/plain":"+----------------+\n| last(price,ts) |\n+----------------+\n| 102.5000 |\n+----------------+"},"execution_count":34,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nSELECT last(price,ts) from tick;\n"},{"attachments":{},"cell_type":"markdown","id":"64531ae4-c41f-4138-872b-1a184a3f26ea","metadata":{"language":"sql"},"source":"Time Bucket function : \n\nSyntax : TIME_BUCKET( bucket_width [,time [,origin]]);\n\nThe time bucket function organizes a timeseries column into specified intervals, allowing for targeted operations within each interval."},{"cell_type":"code","execution_count":36,"id":"f8969da2-cf14-4eb2-b09b-e50e58fd7443","metadata":{"execution":{"iopub.execute_input":"2023-12-14T17:48:20.545089Z","iopub.status.busy":"2023-12-14T17:48:20.544817Z","iopub.status.idle":"2023-12-14T17:48:20.832538Z","shell.execute_reply":"2023-12-14T17:48:20.831948Z","shell.execute_reply.started":"2023-12-14T17:48:20.545073Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n <thead>\n <tr>\n <th>ts</th>\n <th>symbol</th>\n <th>min_pr</th>\n <th>max_pr</th>\n <th>first</th>\n <th>last</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>2020-02-18 10:54:00</td>\n <td>ABC</td>\n <td>100.0000</td>\n <td>100.0000</td>\n <td>100.0000</td>\n <td>100.0000</td>\n </tr>\n <tr>\n <td>2020-02-18 10:56:00</td>\n <td>ABC</td>\n <td>101.0000</td>\n <td>101.0000</td>\n <td>101.0000</td>\n <td>101.0000</td>\n </tr>\n <tr>\n <td>2020-02-18 10:58:00</td>\n <td>ABC</td>\n <td>102.5000</td>\n <td>102.5000</td>\n <td>102.5000</td>\n <td>102.5000</td>\n </tr>\n <tr>\n <td>2020-02-18 11:00:00</td>\n <td>ABC</td>\n <td>102.0000</td>\n <td>103.0000</td>\n <td>102.0000</td>\n <td>103.0000</td>\n </tr>\n <tr>\n <td>2020-02-18 11:02:00</td>\n <td>ABC</td>\n <td>102.6000</td>\n <td>103.0000</td>\n <td>103.0000</td>\n <td>102.6000</td>\n </tr>\n <tr>\n <td>2020-02-18 11:02:00</td>\n <td>XYZ</td>\n <td>102.5000</td>\n <td>103.0000</td>\n <td>103.0000</td>\n <td>102.5000</td>\n </tr>\n </tbody>\n</table>","text/plain":"+---------------------+--------+----------+----------+----------+----------+\n| ts | symbol | min_pr | max_pr | first | last |\n+---------------------+--------+----------+----------+----------+----------+\n| 2020-02-18 10:54:00 | ABC | 100.0000 | 100.0000 | 100.0000 | 100.0000 |\n| 2020-02-18 10:56:00 | ABC | 101.0000 | 101.0000 | 101.0000 | 101.0000 |\n| 2020-02-18 10:58:00 | ABC | 102.5000 | 102.5000 | 102.5000 | 102.5000 |\n| 2020-02-18 11:00:00 | ABC | 102.0000 | 103.0000 | 102.0000 | 103.0000 |\n| 2020-02-18 11:02:00 | ABC | 102.6000 | 103.0000 | 103.0000 | 102.6000 |\n| 2020-02-18 11:02:00 | XYZ | 102.5000 | 103.0000 | 103.0000 | 102.5000 |\n+---------------------+--------+----------+----------+----------+----------+"},"execution_count":36,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nSELECT time_bucket('2m',ts) as ts, symbol, min(price) as min_pr,\n max(price) as max_pr, first(price,ts) as first, last(price,ts) as last\nFROM tick\ngroup by 2, 1\norder by 2, 1;"},{"cell_type":"code","execution_count":null,"id":"1a7fc5af-7256-4ef5-9eb7-2e721ef1a84b","metadata":{"language":"sql","trusted":true},"outputs":[],"source":"%%sql\ndrop table tick;"},{"attachments":{},"cell_type":"markdown","id":"64b1187f-0607-4c10-a203-e9934bef428a","metadata":{"language":"python"},"source":"Interpolation in Singlestore "},{"cell_type":"code","execution_count":38,"id":"4354147e-6876-4d36-8c9b-6ef7d0c9ceee","metadata":{"execution":{"iopub.execute_input":"2023-12-14T17:48:28.046966Z","iopub.status.busy":"2023-12-14T17:48:28.046706Z","iopub.status.idle":"2023-12-14T17:48:28.051005Z","shell.execute_reply":"2023-12-14T17:48:28.050370Z","shell.execute_reply.started":"2023-12-14T17:48:28.046942Z"},"language":"python","trusted":true},"outputs":[{"ename":"SyntaxError","evalue":"invalid syntax (3209436851.py, line 1)","output_type":"error","traceback":["\u001b[0;36m Cell \u001b[0;32mIn[38], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m CREATE TABLE tick (ts datetime(6), symbol varchar(5),\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"]}],"source":"CREATE TABLE tick (ts datetime(6), symbol varchar(5),\n price numeric(18,4));\n"},{"attachments":{},"cell_type":"markdown","id":"25533597-2247-4bab-91de-3f4f637f419c","metadata":{"language":"python"},"source":"We have a table tick and lets assume the data is inserted every sec into the tick table "},{"cell_type":"code","execution_count":null,"id":"7488e107-1a60-4d58-990f-24ce801980f5","metadata":{"language":"python","trusted":true},"outputs":[],"source":"INSERT INTO tick VALUES\n ('2019-02-18 10:55:36.000000', 'ABC', 100.00),\n ('2019-02-18 10:55:37.000000', 'ABC', 102.00),\n ('2019-02-18 10:55:40.000000', 'ABC', 103.00),\n ('2019-02-18 10:55:42.000000', 'ABC', 104.00);\n\nselect * from tick;\n"},{"attachments":{},"cell_type":"markdown","id":"882dd87a-7133-4dd9-a35f-bf2f9e935f57","metadata":{"language":"python"},"source":"As observed in the tick table , the value for 38 , 39 and 41 st second is missing . \nThis is a regular issue obseverd in any timeseries data ."},{"cell_type":"code","execution_count":null,"id":"3b0ba0ed-6c2b-4b7b-95b0-003e432e06ce","metadata":{"language":"python","trusted":true},"outputs":[],"source":"Below is the procedure which has the code for linear interpolation . \n\nThe first one, driver(), retrieves data from a table named tick and then calls another procedure named interpolate_ts() to perform an interpolation on the time series data fetched.\n\nThe interpolate_ts() procedure takes a sorted query result as input, collects the data into an array, and processes it by interpolating timestamps where necessary to ensure a continuous time series.\n\nIt checks for the sorted nature of the time series and performs operations to fill in missing timestamps with interpolated prices. If the time series is not sorted or if there are duplicate timestamps, it raises exceptions accordingly."},{"cell_type":"code","execution_count":null,"id":"b9915b5c-95d9-4f41-b966-8bd20fdd5573","metadata":{"language":"python","trusted":true},"outputs":[],"source":"DELIMITER //\nCREATE OR REPLACE PROCEDURE driver() AS\nDECLARE\n q query(ts datetime(6), symbol varchar(5), price numeric(18,4));\nBEGIN\n q = SELECT ts, symbol, price FROM tick ORDER BY ts;\n\nECHO SELECT 'Input time series' AS message;\n ECHO SELECT * FROM q ORDER BY ts;\n ECHO SELECT 'Interpolated time series' AS message;\n CALL interpolate_ts(q);\nEND //\nDELIMITER ;\n\n\nDELIMITER //\nCREATE OR REPLACE PROCEDURE interpolate_ts(\n q query(ts datetime(6), symbol varchar(5), price numeric(18,4)))\n -- Important: q must produce sorted output by ts\nAS\nDECLARE\n c array(record(ts datetime(6), symbol varchar(5), price numeric(18,4)));\n r record(ts datetime(6), symbol varchar(5), price numeric(18,4));\n r_next record(ts datetime(6), symbol varchar(5), price numeric(18,4));\n n int;\n i int;\n _ts datetime(6); _symbol varchar(5); _price numeric(18,4);\n time_diff int;\n delta numeric(18,4);\nBEGIN\n DROP TABLE IF EXISTS tmp;\n CREATE TEMPORARY TABLE tmp LIKE tick;\n c = collect(q);\n n = length(c);\n IF n < 2 THEN\n ECHO SELECT * FROM q ORDER BY ts;\n return;\n END IF;\n\n\n i = 0;\n r = c[i];\n r_next = c[i + 1];\n\n\n WHILE (i < n) LOOP\n -- IF at last row THEN output it and exit\n IF i = n - 1 THEN\n _ts = r.ts; _symbol = r.symbol; _price = r.price;\n INSERT INTO tmp VALUES(_ts, _symbol, _price);\n i += 1;\n CONTINUE;\n END IF;\n\n\n time_diff = unix_timestamp(r_next.ts) - unix_timestamp(r.ts);\n\n\n IF time_diff <= 0 THEN\n RAISE user_exception(\"time series not sorted or has duplicate timestamps\");\n END IF;\n\n\n -- output r\n _ts = r.ts; _symbol = r.symbol; _price = r.price;\n INSERT INTO tmp VALUES(_ts, _symbol, _price);\n\n\n IF time_diff = 1 THEN\n r = r_next; -- advance to next row\n ELSIF time_diff > 1 THEN\n -- output time_diff-1 rows by extending current row and interpolating price\n delta = (r_next.price - r.price) / time_diff;\n FOR j in 1..time_diff-1 LOOP\n _ts += 1; _price += delta;\n INSERT INTO tmp VALUES(_ts, _symbol, _price);\n END LOOP;\n r = r_next; -- advance to next row\n ELSE\n RAISE user_exception(\"time series not sorted\");\n END IF;\n\n\n i += 1;\n IF i < n - 1 THEN r_next = c[i + 1]; END IF;\n END LOOP;\n ECHO SELECT * FROM tmp ORDER BY ts;\n DROP TABLE tmp;\nEND //\nDELIMITER ;\n"},{"cell_type":"code","execution_count":null,"id":"bdec35a1-445b-4726-9c31-361e625f5bd4","metadata":{"language":"sql","trusted":true},"outputs":[],"source":"%%sql\ncall driver();\n\nselect * from tick;"},{"cell_type":"code","execution_count":null,"id":"1a11d8e8-eaf9-4364-9e4f-a497d87d2b4a","metadata":{"language":"python","trusted":true},"outputs":[],"source":"You can observe the interpolated value for 38 , 39 and 41st second in the tick table abo"}],"metadata":{"jupyterlab":{"notebooks":{"version_major":6,"version_minor":4}},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.4"},"singlestore_cell_default_language":"python","singlestore_connection":{"connectionID":"2474c525-3ec7-4571-8007-8a6717864e8d","defaultDatabase":"time_series"}},"nbformat":4,"nbformat_minor":5}