From 7729e1a0577b5d64df1f87fb87281b962f267558 Mon Sep 17 00:00:00 2001
From: Lorenzo Mangani <lorenzo.mangani@gmail.com>
Date: Mon, 30 Sep 2024 17:37:37 +0200
Subject: [PATCH 1/7] Update MainDistributionPipeline.yml

---
 .github/workflows/MainDistributionPipeline.yml | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml
index 683f245..4302228 100644
--- a/.github/workflows/MainDistributionPipeline.yml
+++ b/.github/workflows/MainDistributionPipeline.yml
@@ -3,8 +3,7 @@
 #
 name: Main Extension Distribution Pipeline
 on:
-  release:
-    types: [published]
+  push:
   pull_request:
   workflow_dispatch:
 
@@ -15,18 +14,17 @@ concurrency:
 jobs:
   duckdb-stable-build:
     name: Build extension binaries
-    uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
+    uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.0.0
     with:
-      duckdb_version: main
-      extension_name: chsql
+      duckdb_version: v1.1.1
+      extension_name: dynamic_sql_examples
 
   duckdb-stable-deploy:
     name: Deploy extension binaries
     needs: duckdb-stable-build
-    uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main
+    uses: ./.github/workflows/_extension_deploy.yml
     secrets: inherit
     with:
-      duckdb_version: main
-      extension_name: chsql
+      duckdb_version: v1.1.1
+      extension_name: dynamic_sql_examples
       deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
-      deploy_versioned: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}

From 2a5d798f67667f86a26d8ec9f5674fdf91049790 Mon Sep 17 00:00:00 2001
From: Lorenzo Mangani <lorenzo.mangani@gmail.com>
Date: Mon, 30 Sep 2024 17:38:03 +0200
Subject: [PATCH 2/7] Update _extension_deploy.yml


From 9e2554051612840ab0874b1031d7999d52448d8a Mon Sep 17 00:00:00 2001
From: Lorenzo Mangani <lorenzo.mangani@gmail.com>
Date: Mon, 30 Sep 2024 18:17:29 +0200
Subject: [PATCH 3/7] Update Pipeline

---
 .github/workflows/MainDistributionPipeline.yml | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml
index 4302228..54d7118 100644
--- a/.github/workflows/MainDistributionPipeline.yml
+++ b/.github/workflows/MainDistributionPipeline.yml
@@ -12,19 +12,29 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  duckdb-next-build:
+    name: Build extension binaries (next)
+    uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
+    with:
+      duckdb_version: main
+      ci_tools_version: main
+      extension_name: chsql
+
   duckdb-stable-build:
     name: Build extension binaries
-    uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.0.0
+    uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.1
     with:
       duckdb_version: v1.1.1
-      extension_name: dynamic_sql_examples
+      ci_tools_version: v1.1.1
+      extension_name: chsql
 
   duckdb-stable-deploy:
     name: Deploy extension binaries
     needs: duckdb-stable-build
+    if: false
     uses: ./.github/workflows/_extension_deploy.yml
     secrets: inherit
     with:
       duckdb_version: v1.1.1
-      extension_name: dynamic_sql_examples
+      extension_name: chsql
       deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}

From f8c9234d47a8bb98660bfebb3aa8cf862a09e112 Mon Sep 17 00:00:00 2001
From: Lorenzo Mangani <lorenzo.mangani@gmail.com>
Date: Tue, 1 Oct 2024 15:14:12 +0200
Subject: [PATCH 4/7] Update README.md

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 81832a9..5147ed4 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,9 @@ LOAD chsql;
 ```
 
 ## Usage Examples
-Once installed, the macro functions provided by the extension can be used just like built-in functions.
+Once installed, the [macro functions](https://community-extensions.duckdb.org/extensions/chsql.html#added-functions) provided by the extension can be used just like built-in functions.
+
+Here's a random example out of 100s using the `IPv4StringToNum` and `IPv4NumToString` functions:
 
 ```sql
 D INSTALL chsql FROM community;

From 751d1a47fcc1a4ece529d511dd8b09d169c7775a Mon Sep 17 00:00:00 2001
From: akvlad <2798590+akvlad@users.noreply.github.com>
Date: Fri, 11 Oct 2024 19:34:47 +0300
Subject: [PATCH 5/7] Feature: read_parquet_mergetree (#13)

* parquet ordered scan / mergetree like feature

Co-authored-by: Lorenzo Mangani <lorenzo.mangani@gmail.com>
---
 CMakeLists.txt                                |  23 +-
 Makefile                                      |   2 +-
 chsql/CMakeLists.txt                          |  34 +++
 chsql/extension_config.cmake                  |  22 ++
 {src => chsql/src}/chsql_extension.cpp        |   3 +-
 .../src}/include/chsql_extension.hpp          |   3 +-
 chsql/src/parquet_ordered_scan.cpp            | 286 ++++++++++++++++++
 chsql/src/silly_btree_store.cpp               |  85 ++++++
 {test => chsql/test}/README.md                |   0
 {test => chsql/test}/sql/chsql.test           |  19 ++
 vcpkg.json => chsql/vcpkg.json                |   0
 extension_config.cmake                        |  10 -
 12 files changed, 454 insertions(+), 33 deletions(-)
 create mode 100644 chsql/CMakeLists.txt
 create mode 100644 chsql/extension_config.cmake
 rename {src => chsql/src}/chsql_extension.cpp (99%)
 rename {src => chsql/src}/include/chsql_extension.hpp (67%)
 create mode 100644 chsql/src/parquet_ordered_scan.cpp
 create mode 100644 chsql/src/silly_btree_store.cpp
 rename {test => chsql/test}/README.md (100%)
 rename {test => chsql/test}/sql/chsql.test (88%)
 rename vcpkg.json => chsql/vcpkg.json (100%)
 delete mode 100644 extension_config.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fc06a4c..f750372 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,30 +1,13 @@
 cmake_minimum_required(VERSION 3.5)
-
 # Set extension name here
 set(TARGET_NAME chsql)
-
 # DuckDB's extension distribution supports vcpkg. As such, dependencies can be added in ./vcpkg.json and then
 # used in cmake with find_package. Feel free to remove or replace with other dependencies.
 # Note that it should also be removed from vcpkg.json to prevent needlessly installing it..
 find_package(OpenSSL REQUIRED)
-
 set(EXTENSION_NAME ${TARGET_NAME}_extension)
 set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
-
 project(${TARGET_NAME})
-include_directories(src/include)
-
-set(EXTENSION_SOURCES src/chsql_extension.cpp)
-
-build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
-build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
-
-# Link OpenSSL in both the static library as the loadable extension
-target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto)
-target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto)
-
-install(
-  TARGETS ${EXTENSION_NAME}
-  EXPORT "${DUCKDB_EXPORT_SET}"
-  LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
-  ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
+set(EXT_NAME chsql)
+set(DUCKDB_EXTENSION_CONFIGS ../chsql/extension_config.cmake)
+add_subdirectory(./duckdb)
diff --git a/Makefile b/Makefile
index d1b6c95..acd730b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))chsql/
 
 # Configuration of extension
 EXT_NAME=chsql
diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt
new file mode 100644
index 0000000..06de4a9
--- /dev/null
+++ b/chsql/CMakeLists.txt
@@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 3.5)
+# Set extension name here
+set(TARGET_NAME chsql)
+# DuckDB's extension distribution supports vcpkg. As such, dependencies can be added in ./vcpkg.json and then
+# used in cmake with find_package. Feel free to remove or replace with other dependencies.
+# Note that it should also be removed from vcpkg.json to prevent needlessly installing it..
+find_package(OpenSSL REQUIRED)
+set(EXTENSION_NAME ${TARGET_NAME}_extension)
+set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
+project(${TARGET_NAME})
+
+include_directories(
+        ./src/include
+        ./src
+        ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include
+        ../duckdb/third_party/lz4
+        ../duckdb/third_party/parquet
+        ../duckdb/third_party/thrift
+        ../duckdb/third_party/snappy
+        ../duckdb/third_party/zstd/include
+        ../duckdb/third_party/mbedtls
+        ../duckdb/third_party/mbedtls/include
+        ../duckdb/third_party/brotli/include)
+set(EXTENSION_SOURCES src/chsql_extension.cpp)
+build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
+build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
+# Link OpenSSL in both the static library as the loadable extension
+target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto)
+target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto)
+install(
+        TARGETS ${EXTENSION_NAME}
+        EXPORT "${DUCKDB_EXPORT_SET}"
+        LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
+        ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
diff --git a/chsql/extension_config.cmake b/chsql/extension_config.cmake
new file mode 100644
index 0000000..776f13b
--- /dev/null
+++ b/chsql/extension_config.cmake
@@ -0,0 +1,22 @@
+# This file is included by DuckDB's build system. It specifies which extension to load
+
+include_directories(
+        ./src/include
+        ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include
+        ../duckdb/third_party/lz4
+        ../duckdb/third_party/parquet
+        ../duckdb/third_party/thrift
+        ../duckdb/third_party/snappy
+        ../duckdb/third_party/zstd/include
+        ../duckdb/third_party/mbedtls
+        ../duckdb/third_party/mbedtls/include
+        ../duckdb/third_party/brotli/include)
+
+# Extension from this repo
+duckdb_extension_load(chsql
+    SOURCE_DIR  ${CMAKE_CURRENT_LIST_DIR}
+    LOAD_TESTS
+)
+
+# Any extra extensions that should be built
+# e.g.: duckdb_extension_load(json)
diff --git a/src/chsql_extension.cpp b/chsql/src/chsql_extension.cpp
similarity index 99%
rename from src/chsql_extension.cpp
rename to chsql/src/chsql_extension.cpp
index 6756dea..5d55613 100644
--- a/src/chsql_extension.cpp
+++ b/chsql/src/chsql_extension.cpp
@@ -12,7 +12,7 @@
 
 // OpenSSL linked through vcpkg
 #include <openssl/opensslv.h>
-
+#include "parquet_ordered_scan.cpp"
 namespace duckdb {
 
 // To add a new scalar SQL macro, add a new macro to this array!
@@ -188,6 +188,7 @@ static void LoadInternal(DatabaseInstance &instance) {
 		auto table_info = DefaultTableFunctionGenerator::CreateTableMacroInfo(chsql_table_macros[index]);
         ExtensionUtil::RegisterFunction(instance, *table_info);
 	}
+	ExtensionUtil::RegisterFunction(instance, ReadParquetOrderedFunction());
 }
 
 void ChsqlExtension::Load(DuckDB &db) {
diff --git a/src/include/chsql_extension.hpp b/chsql/src/include/chsql_extension.hpp
similarity index 67%
rename from src/include/chsql_extension.hpp
rename to chsql/src/include/chsql_extension.hpp
index 57abf17..33e2c6f 100644
--- a/src/include/chsql_extension.hpp
+++ b/chsql/src/include/chsql_extension.hpp
@@ -10,5 +10,6 @@ class ChsqlExtension : public Extension {
 	std::string Name() override;
         std::string Version() const override;
 };
-
+duckdb::TableFunction ReadParquetOrderedFunction();
+static void RegisterSillyBTreeStore(DatabaseInstance &instance);
 } // namespace duckdb
diff --git a/chsql/src/parquet_ordered_scan.cpp b/chsql/src/parquet_ordered_scan.cpp
new file mode 100644
index 0000000..be3c617
--- /dev/null
+++ b/chsql/src/parquet_ordered_scan.cpp
@@ -0,0 +1,286 @@
+#include <duckdb.hpp>
+#include "duckdb/common/exception.hpp"
+#include <parquet_reader.hpp>
+#include "chsql_extension.hpp"
+#include <duckdb/common/multi_file_list.hpp>
+
+namespace duckdb {
+
+	struct ReaderSet {
+		unique_ptr<ParquetReader> reader;
+		idx_t orderByIdx;
+		unique_ptr<DataChunk> chunk;
+		unique_ptr<ParquetReaderScanState> scanState;
+		vector<idx_t> columnMap;
+		idx_t result_idx;
+	};
+
+	struct OrderedReadFunctionData : FunctionData {
+		string orderBy;
+		vector<unique_ptr<ReaderSet>> sets;
+		vector<string> files;
+		vector<LogicalType> returnTypes;
+		vector<string> names;
+		unique_ptr<FunctionData> Copy() const override {
+			throw std::runtime_error("not implemented");
+		}
+		static bool EqualStrArrays(const vector<string> &a, const vector<string> &b) {
+			if (a.size() != b.size()) {
+				return false;
+			}
+			for (int i = 0; i < a.size(); i++) {
+				if (a[i] != b[i]) {
+					return false;
+				}
+			}
+			return true;
+		}
+		bool Equals(const FunctionData &other) const override {
+			const auto &o = other.Cast<OrderedReadFunctionData>();
+			if (!EqualStrArrays(o.files, files)) {
+				return false;
+			}
+			return this->orderBy ==  o.orderBy;
+		};
+	};
+
+
+
+	struct  OrderedReadLocalState: LocalTableFunctionState {
+		vector<unique_ptr<ReaderSet>> sets;
+		vector<idx_t> winner_group;
+		void RecalculateWinnerGroup() {
+			winner_group.clear();
+			if (sets.empty()) {
+				return;
+			}
+			idx_t winner_idx = 0;
+			for (idx_t i = 1; i < sets.size(); i++) {
+				const auto &s = sets[i];
+				const auto &w = sets[winner_idx];
+				if (s->chunk->GetValue(s->orderByIdx, s->result_idx) <
+					w->chunk->GetValue(w->orderByIdx, w->result_idx)) {
+					winner_idx = i;
+					}
+			}
+			winner_group.push_back(winner_idx);
+			auto &w = sets[winner_idx];
+			const auto &wLast = w->chunk->GetValue(w->orderByIdx, w->chunk->size()-1);
+			for (idx_t i = 0; i < sets.size(); i++) {
+				if (i == winner_idx)  continue;
+				auto &s = sets[i];
+				const auto &sFirst = s->chunk->GetValue(s->orderByIdx, s->result_idx);
+				if (sFirst <= wLast) {
+					winner_group.push_back(i);
+				}
+			}
+		}
+		void RemoveSetGracefully(const idx_t idx) {
+			if (idx != sets.size() - 1) {
+				sets[idx].reset(sets[sets.size() - 1].release());
+			}
+			sets.pop_back();
+		}
+	};
+
+
+	static unique_ptr<FunctionData> OrderedParquetScanBind(ClientContext &context, TableFunctionBindInput &input,
+														vector<LogicalType> &return_types, vector<string> &names) {
+		Connection conn(*context.db);
+		auto res = make_uniq<OrderedReadFunctionData>();
+		auto files = ListValue::GetChildren(input.inputs[0]);
+		vector<string> fileNames;
+		for (auto & file : files) {
+			fileNames.push_back(file.ToString());
+		}
+		GlobMultiFileList fileList(context, fileNames, FileGlobOptions::ALLOW_EMPTY);
+		string filename;
+		MultiFileListScanData it;
+		fileList.InitializeScan(it);
+		vector<string> unglobbedFileList;
+		while (fileList.Scan(it, filename)) {
+			unglobbedFileList.push_back(filename);
+		}
+
+		res->orderBy = input.inputs[1].GetValue<string>();
+		for (auto & file : unglobbedFileList) {
+			auto set = make_uniq<ReaderSet>();
+			res->files.push_back(file);
+			ParquetOptions po;
+			po.binary_as_string = true;
+			ParquetReader reader(context, file, po, nullptr);
+			set->columnMap = vector<idx_t>();
+			for (auto &el : reader.metadata->metadata->schema) {
+				if (el.num_children != 0) {
+					continue;
+				}
+				auto name_it = std::find(names.begin(), names.end(), el.name);
+				auto return_type = LogicalType::ANY;
+				switch (el.type) {
+					case Type::INT32:
+						return_type = LogicalType::INTEGER;
+					break;
+					case Type::INT64:
+						return_type = LogicalType::BIGINT;
+					break;
+					case Type::DOUBLE:
+						return_type = LogicalType::DOUBLE;
+					break;
+					case Type::FLOAT:
+						return_type = LogicalType::FLOAT;
+					break;
+					case Type::BYTE_ARRAY:
+						return_type = LogicalType::VARCHAR;
+					case Type::FIXED_LEN_BYTE_ARRAY:
+						return_type = LogicalType::VARCHAR;
+					break;
+					case Type::BOOLEAN:
+						return_type = LogicalType::TINYINT;
+					break;
+					default:
+						break;;
+				}
+				set->columnMap.push_back(name_it - names.begin());
+				if (el.name == res->orderBy) {
+					set->orderByIdx = name_it - names.begin();
+				}
+				if (name_it != names.end()) {
+					if (return_types[name_it - names.begin()] != return_type) {
+						throw std::runtime_error("incompatible schema");
+					}
+					continue;
+				}
+				return_types.push_back(return_type);
+				names.push_back(el.name);
+			}
+			res->sets.push_back(std::move(set));
+		}
+		res->returnTypes = return_types;
+		res->names = names;
+		return std::move(res);
+	}
+
+	static unique_ptr<LocalTableFunctionState>
+	ParquetScanInitLocal(ExecutionContext &context, TableFunctionInitInput &input, GlobalTableFunctionState *gstate_p) {
+		auto res = make_uniq<OrderedReadLocalState>();
+		const auto &bindData = input.bind_data->Cast<OrderedReadFunctionData>();
+		ParquetOptions po;
+		po.binary_as_string = true;
+		for (int i = 0; i < bindData.files.size(); i++) {
+			auto set = make_uniq<ReaderSet>();
+			set->reader = make_uniq<ParquetReader>(context.client, bindData.files[i], po, nullptr);
+			set->scanState = make_uniq<ParquetReaderScanState>();
+			int j = 0;
+			for (auto &el : set->reader->metadata->metadata->schema) {
+				if (el.num_children != 0) {
+					continue;
+				}
+				set->reader->reader_data.column_ids.push_back(j);
+				j++;
+			}
+			set->columnMap = bindData.sets[i]->columnMap;
+			set->reader->reader_data.column_mapping = set->columnMap;
+			vector<idx_t> rgs(set->reader->metadata->metadata->row_groups.size(), 0);
+			for (idx_t i = 0; i < rgs.size(); i++) {
+				rgs[i] = i;
+			}
+			set->reader->InitializeScan(context.client, *set->scanState, rgs);
+			set->chunk = make_uniq<DataChunk>();
+
+			set->orderByIdx = bindData.sets[i]->orderByIdx;
+			set->result_idx = 0;
+			auto ltypes = vector<LogicalType>();
+			for (const auto idx : set->columnMap) {
+				ltypes.push_back(bindData.returnTypes[idx]);
+			}
+			set->chunk->Initialize(context.client, ltypes);
+			set->reader->Scan(*set->scanState, *set->chunk);
+			res->sets.push_back(std::move(set));
+		}
+		res->RecalculateWinnerGroup();
+		return std::move(res);
+	}
+
+	static void ParquetOrderedScanImplementation(
+		ClientContext &context, duckdb::TableFunctionInput &data_p,DataChunk &output) {
+		auto &loc_state = data_p.local_state->Cast<OrderedReadLocalState>();
+		const auto &fieldNames = data_p.bind_data->Cast<OrderedReadFunctionData>().names;
+		const auto &returnTypes = data_p.bind_data->Cast<OrderedReadFunctionData>().returnTypes;
+		bool toRecalc = false;
+		for (int i = loc_state.sets.size() - 1; i >= 0 ; i--) {
+			if (loc_state.sets[i]->result_idx >= loc_state.sets[i]->chunk->size()) {
+				auto &set = loc_state.sets[i];
+				set->chunk->Reset();
+				loc_state.sets[i]->reader->Scan(
+					*loc_state.sets[i]->scanState,
+					*loc_state.sets[i]->chunk);
+				loc_state.sets[i]->result_idx = 0;
+
+				if (loc_state.sets[i]->chunk->size() == 0) {
+					loc_state.RemoveSetGracefully(i);
+				}
+				toRecalc = true;
+			}
+		}
+		if (loc_state.sets.empty()) {
+			return;
+		}
+		if (toRecalc) {
+			loc_state.RecalculateWinnerGroup();
+		}
+		int cap = 1024;
+		output.Reset();
+		output.SetCapacity(cap);
+		idx_t j = 0;
+		if (loc_state.winner_group.size() == 1) {
+			auto &set = loc_state.sets[loc_state.winner_group[0]];
+			set->chunk->Slice(set->result_idx, set->chunk->size() - set->result_idx);
+			output.Append(*set->chunk, true);
+			output.SetCardinality(set->chunk->size());
+			set->result_idx = set->chunk->size();
+			return;
+		}
+		while(true) {
+			auto winnerSet = &loc_state.sets[loc_state.winner_group[0]];
+			Value winner_val = (*winnerSet)->chunk->GetValue(
+									(*winnerSet)->orderByIdx,
+									(*winnerSet)->result_idx
+									);
+			for (int k = 1; k < loc_state.winner_group.size(); k++) {
+				const auto i = loc_state.winner_group[k];
+				const auto &set = loc_state.sets[i];
+				const Value &val = set->chunk->GetValue(set->orderByIdx, set->result_idx);
+				if (val < winner_val) {
+					winnerSet = &loc_state.sets[i];
+					winner_val = (*winnerSet)->chunk->GetValue(set->orderByIdx, set->result_idx);
+				}
+			}
+			for (int i = 0; i < fieldNames.size(); i++) {
+				const auto &val = (*winnerSet)->chunk->GetValue(i,(*winnerSet)->result_idx);
+				output.SetValue(i, j, val);
+			}
+			j++;
+			(*winnerSet)->result_idx++;
+			if ((*winnerSet)->result_idx >= (*winnerSet)->chunk->size() || j >= 2048) {
+				output.SetCardinality(j);
+				return;
+			}
+			if (j >= cap) {
+				cap *= 2;
+				output.SetCapacity(cap);
+			}
+		}
+	}
+
+	TableFunction ReadParquetOrderedFunction() {
+		TableFunction tf = duckdb::TableFunction(
+			"read_parquet_mergetree",
+			{LogicalType::LIST(LogicalType::VARCHAR), LogicalType::VARCHAR},
+			ParquetOrderedScanImplementation,
+			OrderedParquetScanBind,
+			nullptr,
+			ParquetScanInitLocal
+			);
+		return tf;
+	}
+}
diff --git a/chsql/src/silly_btree_store.cpp b/chsql/src/silly_btree_store.cpp
new file mode 100644
index 0000000..3def1f3
--- /dev/null
+++ b/chsql/src/silly_btree_store.cpp
@@ -0,0 +1,85 @@
+#include <duckdb.hpp>
+#include <hash_map>
+#include <map>
+#include <unordered_map>
+
+namespace duckdb {
+    struct SillyTreeKey {
+        //TODO: key-value pair using duckdb::Value
+    };
+    struct SillyTreeValue {
+        //TODO: key-value pair using duckdb::Value
+    };
+    static std::unordered_map<idx_t, std::map<SillyTreeKey, SillyTreeValue>> silly_btree_store;
+    static std::unordered_map<idx_t, unique_ptr<std::mutex>> silly_btree_store_mutex;
+    static std::mutex silly_btree_store_mutex_lock;
+
+
+
+    struct  _lock {
+        std::mutex &_m;
+        _lock(std::mutex &m) : _m(m) {
+            _m.lock();
+        }
+        ~_lock() {
+            _m.unlock();
+        }
+    };
+
+    struct AppendTreeData:FunctionData {
+        unique_ptr<FunctionData> Copy() const override {
+            throw std::runtime_error("not implemented");
+        }
+
+        bool Equals(const FunctionData &other) const override {
+            return false;
+        }
+
+        idx_t tree_id;
+        Value key;
+        Value value;
+    };
+
+    static unique_ptr<AppendTreeData> AppendTreeBind(ClientContext &context, TableFunctionBindInput &input,
+        vector<LogicalType> &return_types, vector<string> &names) {
+        auto res = make_uniq<AppendTreeData>();
+        res->tree_id = input.inputs[0].GetValue<idx_t>();
+        res->key = input.inputs[1].GetValue<Value>();
+        res->value = input.inputs[2].GetValue<Value>();
+        return_types.clear();
+        return_types.push_back(LogicalType::TINYINT);
+        names.clear();
+        names.push_back("ok");
+        return res;
+    }
+
+    static unique_ptr<LocalTableFunctionState> AppendTreeLocalState(
+        ClientContext &context, TableFunctionBindInput &input,
+        vector<LogicalType> &return_types, vector<string> &names) {
+        return nullptr;
+    }
+
+    struct SillyTree {
+        std::map<Value, Value> &values;
+        std::unique_ptr<_lock> lock;
+        SillyTree(std::map<Value, Value> &values, std::unique_ptr<_lock> lock): values(values), lock(std::move(lock)) {}
+    };
+
+    static unique_ptr<SillyTree> GetTree(idx_t tree_id) {
+        auto l = _lock(silly_btree_store_mutex_lock);
+        auto it = silly_btree_store.find(tree_id);
+        if (it == silly_btree_store.end()) {
+            silly_btree_store[tree_id] = std::map<SillyTreeKey, SillyTreeValue>();
+            silly_btree_store_mutex[tree_id] = make_uniq<std::mutex>();
+        }
+
+    }
+
+    static void AppendTreeImplementation(ClientContext &context, duckdb::TableFunctionInput &data_p,DataChunk &output) {
+
+    }
+
+    static void RegisterSillyBtreeStore(DatabaseHeader &instance) {
+        //TODO: register store functions
+    }
+}
\ No newline at end of file
diff --git a/test/README.md b/chsql/test/README.md
similarity index 100%
rename from test/README.md
rename to chsql/test/README.md
diff --git a/test/sql/chsql.test b/chsql/test/sql/chsql.test
similarity index 88%
rename from test/sql/chsql.test
rename to chsql/test/sql/chsql.test
index d753ece..84c2527 100644
--- a/test/sql/chsql.test
+++ b/chsql/test/sql/chsql.test
@@ -11,6 +11,8 @@ Catalog Error: Scalar Function with name chsql does not exist!
 # Require statement will ensure this test is run with this extension loaded
 require chsql
 
+require parquet
+
 # Confirm the extension works
 query I
 SELECT chsql('Works');
@@ -386,3 +388,20 @@ query I
 SELECT bitCount(7)  -- Binary: 111
 ----
 3
+
+# read_mergetree
+statement ok
+copy (select number * 2 as n from numbers(100000)) TO '__TEST_DIR__/1.parquet';
+
+statement ok
+copy (select number * 2 + 1 as n from numbers(100000)) TO '__TEST_DIR__/2.parquet';
+
+query I
+select count() as c from read_parquet_mergetree(ARRAY['__TEST_DIR__/1.parquet', '__TEST_DIR__/2.parquet'], 'n');
+----
+200000
+
+query I
+select count() as c from (select n - lag(n) over () as diff from read_parquet_mergetree(ARRAY['__TEST_DIR__/1.parquet', '__TEST_DIR__/2.parquet'], 'n')) where diff <0;
+----
+0
diff --git a/vcpkg.json b/chsql/vcpkg.json
similarity index 100%
rename from vcpkg.json
rename to chsql/vcpkg.json
diff --git a/extension_config.cmake b/extension_config.cmake
deleted file mode 100644
index 85d494c..0000000
--- a/extension_config.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-# This file is included by DuckDB's build system. It specifies which extension to load
-
-# Extension from this repo
-duckdb_extension_load(chsql
-    SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}
-    LOAD_TESTS
-)
-
-# Any extra extensions that should be built
-# e.g.: duckdb_extension_load(json)

From 89e1c2fd68c18e018f5afb7ef81b3454abb46dfc Mon Sep 17 00:00:00 2001
From: Lorenzo Mangani <lorenzo.mangani@gmail.com>
Date: Fri, 11 Oct 2024 23:28:24 +0200
Subject: [PATCH 6/7] Handle empty patterns

---
 chsql/src/parquet_ordered_scan.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/chsql/src/parquet_ordered_scan.cpp b/chsql/src/parquet_ordered_scan.cpp
index be3c617..88585c4 100644
--- a/chsql/src/parquet_ordered_scan.cpp
+++ b/chsql/src/parquet_ordered_scan.cpp
@@ -101,6 +101,9 @@ namespace duckdb {
 		while (fileList.Scan(it, filename)) {
 			unglobbedFileList.push_back(filename);
 		}
+		if (unglobbedFileList.empty()) {
+		    throw duckdb::InvalidInputException("No files matched the provided pattern.");
+		}
 
 		res->orderBy = input.inputs[1].GetValue<string>();
 		for (auto & file : unglobbedFileList) {

From a000d4f2a5f9e1c13cddcd75ea2d3351466f1706 Mon Sep 17 00:00:00 2001
From: Lorenzo Mangani <lorenzo.mangani@gmail.com>
Date: Sun, 13 Oct 2024 15:52:34 +0200
Subject: [PATCH 7/7] Update extract_macros.yml

---
 .github/workflows/extract_macros.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/extract_macros.yml b/.github/workflows/extract_macros.yml
index a2efca9..5012e73 100644
--- a/.github/workflows/extract_macros.yml
+++ b/.github/workflows/extract_macros.yml
@@ -11,7 +11,7 @@ jobs:
     - uses: actions/checkout@v4
     - name: Extract Macros
       run: |
-        grep -oP '{DEFAULT_SCHEMA, "\K[^"]*|, R"\K[^"]*' ./src/chsql_extension.cpp | awk 'NR%2{printf "%s ", $0; next} 1' | grep -v ^"times_" > /tmp/macros.md
+        grep -oP '{DEFAULT_SCHEMA, "\K[^"]*|, R"\K[^"]*' ./chsql/src/chsql_extension.cpp | awk 'NR%2{printf "%s ", $0; next} 1' | grep -v ^"times_" > /tmp/macros.md
     - name: Summary Report
       run: |
         echo "" >> $GITHUB_STEP_SUMMARY

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://rainy.clevelandohioweatherforecast.com/php-proxy/index.php?q=https://github.com/quackscience/duckdb-extension-clickhouse-sql/compare/v1.0.4...v1.0.5.patch" target="_blank">Alternative Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/index.php?u=https://github.com/quackscience/duckdb-extension-clickhouse-sql/compare/v1.0.4...v1.0.5.patch" target="_blank">pFad Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v3index.php?u=https://github.com/quackscience/duckdb-extension-clickhouse-sql/compare/v1.0.4...v1.0.5.patch" target="_blank">pFad v3 Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v4index.php?u=https://github.com/quackscience/duckdb-extension-clickhouse-sql/compare/v1.0.4...v1.0.5.patch" target="_blank">pFad v4 Proxy</a></p></body>
</html>