diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a54282c0700fc8578fefc62fd8e52beb6a8bf025..bb51d18a84a9fadd2ee9daee60c2fff3f24ff4a5 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,14 +1,25 @@
-image: python:3.9.13
+image: mambaorg/micromamba
 
-test-evalhyd-python:
-  stage: test
+stages:
+  - build_and_test
+
+build-and-test:
+  stage: build_and_test
   variables:
     GIT_SUBMODULE_STRATEGY: recursive
   script:
+    # install dependencies
+    - micromamba install --yes --file environment.yml
+    - export EVALHYD_PYTHON_VENDOR_XTL=FALSE
+    - export EVALHYD_PYTHON_VENDOR_XTENSOR=FALSE
+    - export EVALHYD_PYTHON_VENDOR_XTENSOR_PYTHON=FALSE
+    # vendor evalhyd (while waiting for evalhyd to be uploaded to conda-forge)
+    - micromamba install --yes -c conda-forge git
+    - export EVALHYD_PYTHON_VENDOR_EVALHYD=TRUE
     # print Python version
     - python --version
     # compile and install package
-    - python -m pip install --user .[tests]
+    - python -m pip install --user .[tests] -v
     # make sure package can be imported and check version
     - python -c "import evalhyd;print(evalhyd.__version__)"
     # run test suite
diff --git a/.gitmodules b/.gitmodules
index 70e58bc3f31f68f1157a5d335d805857a727ab25..92278900adfe1a86a2f711d380964056d67d214c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,12 @@
-[submodule "deps/evalhyd"]
-	path = deps/evalhyd
-	url = https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd.git
+[submodule "deps/xtl"]
+	path = deps/xtl
+	url = https://github.com/xtensor-stack/xtl.git
 [submodule "deps/xtensor-python"]
 	path = deps/xtensor-python
 	url = https://github.com/xtensor-stack/xtensor-python.git
+[submodule "deps/xtensor"]
+	path = deps/xtensor
+	url = https://github.com/xtensor-stack/xtensor.git
+[submodule "deps/evalhyd-cpp"]
+	path = deps/evalhyd-cpp
+	url = https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-cpp.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b37ff90d75ab9eb1cbb43f6f22269a6cbff2abc
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,91 @@
+cmake_minimum_required(VERSION 3.15)
+
+project(
+        EvalHyd-Python
+        LANGUAGES CXX C
+        VERSION 0.1.0.0
+        DESCRIPTION "Python bindings for evalhyd utility"
+)
+
+add_library(
+        evalhyd-python MODULE
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/evalhyd-python.cpp
+)
+
+# ------------------------------------------------------------------------------
+# dependencies and build
+# ------------------------------------------------------------------------------
+
+if(SKBUILD)
+        find_package(PythonExtensions REQUIRED)
+        find_package(NumPy REQUIRED)
+        python_extension_module(evalhyd-python)
+else()
+        find_package(Python COMPONENTS Interpreter Development NumPy REQUIRED)
+        target_link_libraries(evalhyd-python Python::NumPy)
+        # use only header if numpy target links to libpython
+        #target_include_directories(evalhyd-python SYSTEM PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
+endif()
+
+find_package(xtensor 0.7.5 REQUIRED)
+message(STATUS "Found xtl: ${xtl_INCLUDE_DIRS}/xtl")
+
+find_package(xtensor 0.24.6 REQUIRED)
+message(STATUS "Found xtensor: ${xtensor_INCLUDE_DIRS}/xtensor")
+
+find_package(xtensor-python 0.26.1 REQUIRED)
+message(STATUS "Found xtensor-python: ${xtensor-python_INCLUDE_DIRS}/xtensor-python")
+
+find_package(pybind11 REQUIRED)
+message(STATUS "Found pybind11: ${pybind11_INCLUDE_DIRS}/pybind11")
+
+if(DEFINED EVALHYD_SRC)
+        set(EVALHYD_BUILD_TEST OFF CACHE BOOL "configure and compile tests")
+        add_subdirectory(${EVALHYD_SRC} deps/evalhyd)
+else()
+        find_package(EvalHyd 0.1.0 REQUIRED)
+endif()
+
+target_link_libraries(
+        evalhyd-python
+        EvalHyd::evalhyd
+        xtensor-python
+        pybind11::module
+        pybind11::lto
+        pybind11::windows_extras
+)
+
+set_target_properties(
+        evalhyd-python PROPERTIES
+        OUTPUT_NAME evalhyd
+)
+
+pybind11_extension(evalhyd-python)
+if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo)
+        # Strip unnecessary sections of the binary on Linux/macOS
+        pybind11_strip(evalhyd-python)
+endif()
+
+set_target_properties(
+        evalhyd-python PROPERTIES
+        CXX_VISIBILITY_PRESET "hidden"
+        CUDA_VISIBILITY_PRESET "hidden"
+)
+
+# add include directories
+target_include_directories(
+        evalhyd-python
+        PUBLIC
+                ${NUMPY_INCLUDE_DIRS}
+)
+
+# ------------------------------------------------------------------------------
+# installation
+# ------------------------------------------------------------------------------
+
+if(SKBUILD)
+        install(
+                TARGETS evalhyd-python
+                DESTINATION evalhyd-python
+        )
+endif()
diff --git a/LICENCE.rst b/LICENCE.rst
new file mode 100644
index 0000000000000000000000000000000000000000..23f589cb52912b4a70adac8992cf11dbb4054caa
--- /dev/null
+++ b/LICENCE.rst
@@ -0,0 +1,619 @@
+GNU General Public License
+
+Version 3, 29 June 2007
+Copyright Â© 2007 Free Software Foundation, Inc. <https://fsf.org/>
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+Preamble
+--------
+
+The GNU General Public License is a free, copyleft license for software and other
+kinds of works.
+
+The licenses for most software and other practical works are designed to take away
+your freedom to share and change the works. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change all versions of a
+program--to make sure it remains free software for all its users. We, the Free
+Software Foundation, use the GNU General Public License for most of our software; it
+applies also to any other work released this way by its authors. You can apply it to
+your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our General
+Public Licenses are designed to make sure that you have the freedom to distribute
+copies of free software (and charge for them if you wish), that you receive source
+code or can get it if you want it, that you can change the software or use pieces of
+it in new free programs, and that you know you can do these things.
+
+To protect your rights, we need to prevent others from denying you these rights or
+asking you to surrender the rights. Therefore, you have certain responsibilities if
+you distribute copies of the software, or if you modify it: responsibilities to
+respect the freedom of others.
+
+For example, if you distribute copies of such a program, whether gratis or for a fee,
+you must pass on to the recipients the same freedoms that you received. You must make
+sure that they, too, receive or can get the source code. And you must show them these
+terms so they know their rights.
+
+Developers that use the GNU GPL protect your rights with two steps: (1) assert
+copyright on the software, and (2) offer you this License giving you legal permission
+to copy, distribute and/or modify it.
+
+For the developers' and authors' protection, the GPL clearly explains that there is
+no warranty for this free software. For both users' and authors' sake, the GPL
+requires that modified versions be marked as changed, so that their problems will not
+be attributed erroneously to authors of previous versions.
+
+Some devices are designed to deny users access to install or run modified versions of
+the software inside them, although the manufacturer can do so. This is fundamentally
+incompatible with the aim of protecting users' freedom to change the software. The
+systematic pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we have designed
+this version of the GPL to prohibit the practice for those products. If such problems
+arise substantially in other domains, we stand ready to extend this provision to
+those domains in future versions of the GPL, as needed to protect the freedom of
+users.
+
+Finally, every program is threatened constantly by software patents. States should
+not allow patents to restrict development and use of software on general-purpose
+computers, but in those that do, we wish to avoid the special danger that patents
+applied to a free program could make it effectively proprietary. To prevent this, the
+GPL assures that patents cannot be used to render the program non-free.
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+Terms and Conditions
+--------------------
+
+0. Definitions
+^^^^^^^^^^^^^^
+
+â€œThis Licenseâ€ refers to version 3 of the GNU General Public License.
+
+â€œCopyrightâ€ also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+â€œThe Programâ€ refers to any copyrightable work licensed under this
+License. Each licensee is addressed as â€œyouâ€. â€œLicenseesâ€ and
+â€œrecipientsâ€ may be individuals or organizations.
+
+To â€œmodifyâ€ a work means to copy from or adapt all or part of the work in
+a fashion requiring copyright permission, other than the making of an exact copy. The
+resulting work is called a â€œmodified versionâ€ of the earlier work or a
+work â€œbased onâ€ the earlier work.
+
+A â€œcovered workâ€ means either the unmodified Program or a work based on
+the Program.
+
+To â€œpropagateâ€ a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for infringement under
+applicable copyright law, except executing it on a computer or modifying a private
+copy. Propagation includes copying, distribution (with or without modification),
+making available to the public, and in some countries other activities as well.
+
+To â€œconveyâ€ a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through a computer
+network, with no transfer of a copy, is not conveying.
+
+An interactive user interface displays â€œAppropriate Legal Noticesâ€ to the
+extent that it includes a convenient and prominently visible feature that (1)
+displays an appropriate copyright notice, and (2) tells the user that there is no
+warranty for the work (except to the extent that warranties are provided), that
+licensees may convey the work under this License, and how to view a copy of this
+License. If the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+1. Source Code
+^^^^^^^^^^^^^^
+
+The â€œsource codeâ€ for a work means the preferred form of the work for
+making modifications to it. â€œObject codeâ€ means any non-source form of a
+work.
+
+A â€œStandard Interfaceâ€ means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of interfaces
+specified for a particular programming language, one that is widely used among
+developers working in that language.
+
+The â€œSystem Librariesâ€ of an executable work include anything, other than
+the work as a whole, that (a) is included in the normal form of packaging a Major
+Component, but which is not part of that Major Component, and (b) serves only to
+enable use of the work with that Major Component, or to implement a Standard
+Interface for which an implementation is available to the public in source code form.
+A â€œMajor Componentâ€, in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system (if any) on which
+the executable work runs, or a compiler used to produce the work, or an object code
+interpreter used to run it.
+
+The â€œCorresponding Sourceâ€ for a work in object code form means all the
+source code needed to generate, install, and (for an executable work) run the object
+code and to modify the work, including scripts to control those activities. However,
+it does not include the work's System Libraries, or general-purpose tools or
+generally available free programs which are used unmodified in performing those
+activities but which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for the work, and
+the source code for shared libraries and dynamically linked subprograms that the work
+is specifically designed to require, such as by intimate data communication or
+control flow between those subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users can regenerate
+automatically from other parts of the Corresponding Source.
+
+The Corresponding Source for a work in source code form is that same work.
+
+2. Basic Permissions
+^^^^^^^^^^^^^^^^^^^^
+
+All rights granted under this License are granted for the term of copyright on the
+Program, and are irrevocable provided the stated conditions are met. This License
+explicitly affirms your unlimited permission to run the unmodified Program. The
+output from running a covered work is covered by this License only if the output,
+given its content, constitutes a covered work. This License acknowledges your rights
+of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not convey, without
+conditions so long as your license otherwise remains in force. You may convey covered
+works to others for the sole purpose of having them make modifications exclusively
+for you, or provide you with facilities for running those works, provided that you
+comply with the terms of this License in conveying all material for which you do not
+control copyright. Those thus making or running the covered works for you must do so
+exclusively on your behalf, under your direction and control, on terms that prohibit
+them from making any copies of your copyrighted material outside their relationship
+with you.
+
+Conveying under any other circumstances is permitted solely under the conditions
+stated below. Sublicensing is not allowed; section 10 makes it unnecessary.
+
+3. Protecting Users' Legal Rights From Anti-Circumvention Law
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+No covered work shall be deemed part of an effective technological measure under any
+applicable law fulfilling obligations under article 11 of the WIPO copyright treaty
+adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention
+of such measures.
+
+When you convey a covered work, you waive any legal power to forbid circumvention of
+technological measures to the extent such circumvention is effected by exercising
+rights under this License with respect to the covered work, and you disclaim any
+intention to limit operation or modification of the work as a means of enforcing,
+against the work's users, your or third parties' legal rights to forbid circumvention
+of technological measures.
+
+4. Conveying Verbatim Copies
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You may convey verbatim copies of the Program's source code as you receive it, in any
+medium, provided that you conspicuously and appropriately publish on each copy an
+appropriate copyright notice; keep intact all notices stating that this License and
+any non-permissive terms added in accord with section 7 apply to the code; keep
+intact all notices of the absence of any warranty; and give all recipients a copy of
+this License along with the Program.
+
+You may charge any price or no price for each copy that you convey, and you may offer
+support or warranty protection for a fee.
+
+5. Conveying Modified Source Versions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You may convey a work based on the Program, or the modifications to produce it from
+the Program, in the form of source code under the terms of section 4, provided that
+you also meet all of these conditions:
+
+   a) The work must carry prominent notices stating that you modified it, and giving a
+      relevant date.
+   b) The work must carry prominent notices stating that it is released under this
+      License and any conditions added under section 7. This requirement modifies the
+      requirement in section 4 to â€œkeep intact all noticesâ€.
+   c) You must license the entire work, as a whole, under this License to anyone who
+      comes into possession of a copy. This License will therefore apply, along with any
+      applicable section 7 additional terms, to the whole of the work, and all its parts,
+      regardless of how they are packaged. This License gives no permission to license the
+      work in any other way, but it does not invalidate such permission if you have
+      separately received it.
+   d) If the work has interactive user interfaces, each must display Appropriate Legal
+      Notices; however, if the Program has interactive interfaces that do not display
+      Appropriate Legal Notices, your work need not make them do so.
+
+A compilation of a covered work with other separate and independent works, which are
+not by their nature extensions of the covered work, and which are not combined with
+it such as to form a larger program, in or on a volume of a storage or distribution
+medium, is called an â€œaggregateâ€ if the compilation and its resulting
+copyright are not used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work in an aggregate
+does not cause this License to apply to the other parts of the aggregate.
+
+6. Conveying Non-Source Forms
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You may convey a covered work in object code form under the terms of sections 4 and
+5, provided that you also convey the machine-readable Corresponding Source under the
+terms of this License, in one of these ways:
+
+   a) Convey the object code in, or embodied in, a physical product (including a
+      physical distribution medium), accompanied by the Corresponding Source fixed on a
+      durable physical medium customarily used for software interchange.
+   b) Convey the object code in, or embodied in, a physical product (including a
+      physical distribution medium), accompanied by a written offer, valid for at least
+      three years and valid for as long as you offer spare parts or customer support for
+      that product model, to give anyone who possesses the object code either (1) a copy of
+      the Corresponding Source for all the software in the product that is covered by this
+      License, on a durable physical medium customarily used for software interchange, for
+      a price no more than your reasonable cost of physically performing this conveying of
+      source, or (2) access to copy the Corresponding Source from a network server at no
+      charge.
+   c) Convey individual copies of the object code with a copy of the written offer to
+      provide the Corresponding Source. This alternative is allowed only occasionally and
+      noncommercially, and only if you received the object code with such an offer, in
+      accord with subsection 6b.
+   d) Convey the object code by offering access from a designated place (gratis or for
+      a charge), and offer equivalent access to the Corresponding Source in the same way
+      through the same place at no further charge. You need not require recipients to copy
+      the Corresponding Source along with the object code. If the place to copy the object
+      code is a network server, the Corresponding Source may be on a different server
+      (operated by you or a third party) that supports equivalent copying facilities,
+      provided you maintain clear directions next to the object code saying where to find
+      the Corresponding Source. Regardless of what server hosts the Corresponding Source,
+      you remain obligated to ensure that it is available for as long as needed to satisfy
+      these requirements.
+   e) Convey the object code using peer-to-peer transmission, provided you inform
+      other peers where the object code and Corresponding Source of the work are being
+      offered to the general public at no charge under subsection 6d.
+
+A separable portion of the object code, whose source code is excluded from the
+Corresponding Source as a System Library, need not be included in conveying the
+object code work.
+
+A â€œUser Productâ€ is either (1) a â€œconsumer productâ€, which
+means any tangible personal property which is normally used for personal, family, or
+household purposes, or (2) anything designed or sold for incorporation into a
+dwelling. In determining whether a product is a consumer product, doubtful cases
+shall be resolved in favor of coverage. For a particular product received by a
+particular user, â€œnormally usedâ€ refers to a typical or common use of
+that class of product, regardless of the status of the particular user or of the way
+in which the particular user actually uses, or expects or is expected to use, the
+product. A product is a consumer product regardless of whether the product has
+substantial commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+â€œInstallation Informationâ€ for a User Product means any methods,
+procedures, authorization keys, or other information required to install and execute
+modified versions of a covered work in that User Product from a modified version of
+its Corresponding Source. The information must suffice to ensure that the continued
+functioning of the modified object code is in no case prevented or interfered with
+solely because modification has been made.
+
+If you convey an object code work under this section in, or with, or specifically for
+use in, a User Product, and the conveying occurs as part of a transaction in which
+the right of possession and use of the User Product is transferred to the recipient
+in perpetuity or for a fixed term (regardless of how the transaction is
+characterized), the Corresponding Source conveyed under this section must be
+accompanied by the Installation Information. But this requirement does not apply if
+neither you nor any third party retains the ability to install modified object code
+on the User Product (for example, the work has been installed in ROM).
+
+The requirement to provide Installation Information does not include a requirement to
+continue to provide support service, warranty, or updates for a work that has been
+modified or installed by the recipient, or for the User Product in which it has been
+modified or installed. Access to a network may be denied when the modification itself
+materially and adversely affects the operation of the network or violates the rules
+and protocols for communication across the network.
+
+Corresponding Source conveyed, and Installation Information provided, in accord with
+this section must be in a format that is publicly documented (and with an
+implementation available to the public in source code form), and must require no
+special password or key for unpacking, reading or copying.
+
+7. Additional Terms
+^^^^^^^^^^^^^^^^^^^
+
+â€œAdditional permissionsâ€ are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions. Additional
+permissions that are applicable to the entire Program shall be treated as though they
+were included in this License, to the extent that they are valid under applicable
+law. If additional permissions apply only to part of the Program, that part may be
+used separately under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option remove any
+additional permissions from that copy, or from any part of it. (Additional
+permissions may be written to require their own removal in certain cases when you
+modify the work.) You may place additional permissions on material, added by you to a
+covered work, for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you add to a
+covered work, you may (if authorized by the copyright holders of that material)
+supplement the terms of this License with terms:
+
+   a) Disclaiming warranty or limiting liability differently from the terms of
+      sections 15 and 16 of this License; or
+   b) Requiring preservation of specified reasonable legal notices or author
+      attributions in that material or in the Appropriate Legal Notices displayed by works
+      containing it; or
+   c) Prohibiting misrepresentation of the origin of that material, or requiring that
+      modified versions of such material be marked in reasonable ways as different from the
+      original version; or
+   d) Limiting the use for publicity purposes of names of licensors or authors of the
+      material; or
+   e) Declining to grant rights under trademark law for use of some trade names,
+      trademarks, or service marks; or
+   f) Requiring indemnification of licensors and authors of that material by anyone
+      who conveys the material (or modified versions of it) with contractual assumptions of
+      liability to the recipient, for any liability that these contractual assumptions
+      directly impose on those licensors and authors.
+
+All other non-permissive additional terms are considered â€œfurther
+restrictionsâ€ within the meaning of section 10. If the Program as you received
+it, or any part of it, contains a notice stating that it is governed by this License
+along with a term that is a further restriction, you may remove that term. If a
+license document contains a further restriction but permits relicensing or conveying
+under this License, you may add to a covered work material governed by the terms of
+that license document, provided that the further restriction does not survive such
+relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you must place, in
+the relevant source files, a statement of the additional terms that apply to those
+files, or a notice indicating where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the form of a
+separately written license, or stated as exceptions; the above requirements apply
+either way.
+
+8. Termination
+^^^^^^^^^^^^^^
+
+You may not propagate or modify a covered work except as expressly provided under
+this License. Any attempt otherwise to propagate or modify it is void, and will
+automatically terminate your rights under this License (including any patent licenses
+granted under the third paragraph of section 11).
+
+However, if you cease all violation of this License, then your license from a
+particular copyright holder is reinstated (a) provisionally, unless and until the
+copyright holder explicitly and finally terminates your license, and (b) permanently,
+if the copyright holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is reinstated permanently
+if the copyright holder notifies you of the violation by some reasonable means, this
+is the first time you have received notice of violation of this License (for any
+work) from that copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the licenses of
+parties who have received copies or rights from you under this License. If your
+rights have been terminated and not permanently reinstated, you do not qualify to
+receive new licenses for the same material under section 10.
+
+9. Acceptance Not Required for Having Copies
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You are not required to accept this License in order to receive or run a copy of the
+Program. Ancillary propagation of a covered work occurring solely as a consequence of
+using peer-to-peer transmission to receive a copy likewise does not require
+acceptance. However, nothing other than this License grants you permission to
+propagate or modify any covered work. These actions infringe copyright if you do not
+accept this License. Therefore, by modifying or propagating a covered work, you
+indicate your acceptance of this License to do so.
+
+10. Automatic Licensing of Downstream Recipients
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Each time you convey a covered work, the recipient automatically receives a license
+from the original licensors, to run, modify and propagate that work, subject to this
+License. You are not responsible for enforcing compliance by third parties with this
+License.
+
+An â€œentity transactionâ€ is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an organization, or
+merging organizations. If propagation of a covered work results from an entity
+transaction, each party to that transaction who receives a copy of the work also
+receives whatever licenses to the work the party's predecessor in interest had or
+could give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if the predecessor
+has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the rights granted or
+affirmed under this License. For example, you may not impose a license fee, royalty,
+or other charge for exercise of rights granted under this License, and you may not
+initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging
+that any patent claim is infringed by making, using, selling, offering for sale, or
+importing the Program or any portion of it.
+
+11. Patents
+^^^^^^^^^^^
+
+A â€œcontributorâ€ is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The work thus
+licensed is called the contributor's â€œcontributor versionâ€.
+
+A contributor's â€œessential patent claimsâ€ are all patent claims owned or
+controlled by the contributor, whether already acquired or hereafter acquired, that
+would be infringed by some manner, permitted by this License, of making, using, or
+selling its contributor version, but do not include claims that would be infringed
+only as a consequence of further modification of the contributor version. For
+purposes of this definition, â€œcontrolâ€ includes the right to grant patent
+sublicenses in a manner consistent with the requirements of this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free patent license
+under the contributor's essential patent claims, to make, use, sell, offer for sale,
+import and otherwise run, modify and propagate the contents of its contributor
+version.
+
+In the following three paragraphs, a â€œpatent licenseâ€ is any express
+agreement or commitment, however denominated, not to enforce a patent (such as an
+express permission to practice a patent or covenant not to sue for patent
+infringement). To â€œgrantâ€ such a patent license to a party means to make
+such an agreement or commitment not to enforce a patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license, and the
+Corresponding Source of the work is not available for anyone to copy, free of charge
+and under the terms of this License, through a publicly available network server or
+other readily accessible means, then you must either (1) cause the Corresponding
+Source to be so available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner consistent with
+the requirements of this License, to extend the patent license to downstream
+recipients. â€œKnowingly relyingâ€ means you have actual knowledge that, but
+for the patent license, your conveying the covered work in a country, or your
+recipient's use of the covered work in a country, would infringe one or more
+identifiable patents in that country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or arrangement, you
+convey, or propagate by procuring conveyance of, a covered work, and grant a patent
+license to some of the parties receiving the covered work authorizing them to use,
+propagate, modify or convey a specific copy of the covered work, then the patent
+license you grant is automatically extended to all recipients of the covered work and
+works based on it.
+
+A patent license is â€œdiscriminatoryâ€ if it does not include within the
+scope of its coverage, prohibits the exercise of, or is conditioned on the
+non-exercise of one or more of the rights that are specifically granted under this
+License. You may not convey a covered work if you are a party to an arrangement with
+a third party that is in the business of distributing software, under which you make
+payment to the third party based on the extent of your activity of conveying the
+work, and under which the third party grants, to any of the parties who would receive
+the covered work from you, a discriminatory patent license (a) in connection with
+copies of the covered work conveyed by you (or copies made from those copies), or (b)
+primarily for and in connection with specific products or compilations that contain
+the covered work, unless you entered into that arrangement, or that patent license
+was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting any implied
+license or other defenses to infringement that may otherwise be available to you
+under applicable patent law.
+
+12. No Surrender of Others' Freedom
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If conditions are imposed on you (whether by court order, agreement or otherwise)
+that contradict the conditions of this License, they do not excuse you from the
+conditions of this License. If you cannot convey a covered work so as to satisfy
+simultaneously your obligations under this License and any other pertinent
+obligations, then as a consequence you may not convey it at all. For example, if you
+agree to terms that obligate you to collect a royalty for further conveying from
+those to whom you convey the Program, the only way you could satisfy both those terms
+and this License would be to refrain entirely from conveying the Program.
+
+13. Use with the GNU Affero General Public License
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Notwithstanding any other provision of this License, you have permission to link or
+combine any covered work with a work licensed under version 3 of the GNU Affero
+General Public License into a single combined work, and to convey the resulting work.
+The terms of this License will continue to apply to the part which is the covered
+work, but the special requirements of the GNU Affero General Public License, section
+13, concerning interaction through a network will apply to the combination as such.
+
+14. Revised Versions of this License
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Free Software Foundation may publish revised and/or new versions of the GNU
+General Public License from time to time. Such new versions will be similar in spirit
+to the present version, but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program specifies that
+a certain numbered version of the GNU General Public License â€œor any later
+versionâ€ applies to it, you have the option of following the terms and
+conditions either of that numbered version or of any later version published by the
+Free Software Foundation. If the Program does not specify a version number of the GNU
+General Public License, you may choose any version ever published by the Free
+Software Foundation.
+
+If the Program specifies that a proxy can decide which future versions of the GNU
+General Public License can be used, that proxy's public statement of acceptance of a
+version permanently authorizes you to choose that version for the Program.
+
+Later license versions may give you additional or different permissions. However, no
+additional obligations are imposed on any author or copyright holder as a result of
+your choosing to follow a later version.
+
+15. Disclaimer of Warranty
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM â€œAS ISâ€ WITHOUT WARRANTY OF ANY KIND, EITHER
+EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
+QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
+DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. Limitation of Liability
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY
+COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS
+PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
+INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE
+OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE
+WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+17. Interpretation of Sections 15 and 16
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If the disclaimer of warranty and limitation of liability provided above cannot be
+given local legal effect according to their terms, reviewing courts shall apply local
+law that most closely approximates an absolute waiver of all civil liability in
+connection with the Program, unless a warranty or assumption of liability accompanies
+a copy of the Program in return for a fee.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+---------------------------------------------
+
+If you develop a new program, and you want it to be of the greatest possible use to
+the public, the best way to achieve this is to make it free software which everyone
+can redistribute and change under these terms.
+
+To do so, attach the following notices to the program. It is safest to attach them
+to the start of each source file to most effectively state the exclusion of warranty;
+and each file should have at least the â€œcopyrightâ€ line and a pointer to
+where the full notice is found.
+
+.. pull-quote::
+
+   <one line to give the program's name and a brief idea of what it does.>
+   Copyright (C) <year>  <name of author>
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program does terminal interaction, make it output a short notice like this
+when it starts in an interactive mode:
+
+.. pull-quote::
+
+   <program>  Copyright (C) <year>  <name of author>
+   This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'.
+   This is free software, and you are welcome to redistribute it
+   under certain conditions; type 'show c' for details.
+
+The hypothetical commands `show w` and `show c` should show the appropriate parts of
+the General Public License. Of course, your program's commands might be different;
+for a GUI interface, you would use an â€œabout boxâ€.
+
+You should also get your employer (if you work as a programmer) or school, if any, to
+sign a â€œcopyright disclaimerâ€ for the program, if necessary. For more
+information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+The GNU General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may consider it
+more useful to permit linking proprietary applications with the library. If this is
+what you want to do, use the GNU Lesser General Public License instead of this
+License. But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
\ No newline at end of file
diff --git a/README.md b/README.md
index edffae2ee053b82ba7c24a03f9d5aff5237c22f0..f9576bc0de46b17f0ff1e1a67f2196f81f32b9f0 100644
--- a/README.md
+++ b/README.md
@@ -2,4 +2,4 @@
 
 Python bindings for `evalhyd` utility
 
-Documentation: https://hycar-hydro.gitlab.irstea.page/evalhyd/evalhyd-docs/python
+Documentation: https://hydrogr.github.io/evalhyd/python
diff --git a/changelog.rst b/changelog.rst
new file mode 100644
index 0000000000000000000000000000000000000000..91f4f5149d4f45b83ceb83ba7554ad97f89c0061
--- /dev/null
+++ b/changelog.rst
@@ -0,0 +1,14 @@
+.. default-role:: obj
+
+..
+   latest
+   ------
+
+   Yet to be versioned and released. Only available from *dev* branch until then.
+
+v0.1.0.0
+--------
+
+Released on 2023-04-14.
+
+* first release
diff --git a/deps/evalhyd b/deps/evalhyd
deleted file mode 160000
index 592caff2eafc89a3cad9da4d47bb1a2eb777b2eb..0000000000000000000000000000000000000000
--- a/deps/evalhyd
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 592caff2eafc89a3cad9da4d47bb1a2eb777b2eb
diff --git a/deps/evalhyd-cpp b/deps/evalhyd-cpp
new file mode 160000
index 0000000000000000000000000000000000000000..d86043f5a28b697a902cc52842ed439583f5f42d
--- /dev/null
+++ b/deps/evalhyd-cpp
@@ -0,0 +1 @@
+Subproject commit d86043f5a28b697a902cc52842ed439583f5f42d
diff --git a/deps/xtensor b/deps/xtensor
new file mode 160000
index 0000000000000000000000000000000000000000..e534928cc30eb3a4a05539747d98e1d6868c2d62
--- /dev/null
+++ b/deps/xtensor
@@ -0,0 +1 @@
+Subproject commit e534928cc30eb3a4a05539747d98e1d6868c2d62
diff --git a/deps/xtensor-python b/deps/xtensor-python
index 6544a559ae98953394a1c51d8a637b71882af8da..6a286681c48c35d3df342f291938f4825b20c0a3 160000
--- a/deps/xtensor-python
+++ b/deps/xtensor-python
@@ -1 +1 @@
-Subproject commit 6544a559ae98953394a1c51d8a637b71882af8da
+Subproject commit 6a286681c48c35d3df342f291938f4825b20c0a3
diff --git a/deps/xtl b/deps/xtl
new file mode 160000
index 0000000000000000000000000000000000000000..fea39142693fbbc2ef19d75012bc6b46ef0a5f8c
--- /dev/null
+++ b/deps/xtl
@@ -0,0 +1 @@
+Subproject commit fea39142693fbbc2ef19d75012bc6b46ef0a5f8c
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b09e246d273533a54e0e5246739099c3f3c73370
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,19 @@
+channels:
+  - conda-forge
+dependencies:
+  # Build dependencies
+  - cxx-compiler
+  - c-compiler
+  - cmake
+  - make
+  - scikit-build
+  # Host dependencies
+  - python
+  - numpy
+  - pybind11
+  - xtl==0.7.5
+  - xtensor==0.24.6
+  - xtensor-python==0.26.1
+#  - evalhyd-cpp==0.1.0
+  # Test dependencies
+  - numpy
diff --git a/evalhyd/__init__.py b/evalhyd/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..14bf7295b8f55f8621f5a51bba0ea5acadbd3858
--- /dev/null
+++ b/evalhyd/__init__.py
@@ -0,0 +1,5 @@
+"""An evaluator for determinist and probabilist streamflow predictions."""
+
+from .version import __version__
+from .evald import evald
+from .evalp import evalp
diff --git a/evalhyd/evald.py b/evalhyd/evald.py
new file mode 100644
index 0000000000000000000000000000000000000000..80d56aab777b4eba5e765b801640461f16ad73a2
--- /dev/null
+++ b/evalhyd/evald.py
@@ -0,0 +1,80 @@
+from typing import List, Dict
+from numpy import dtype
+from numpy.typing import NDArray
+
+try:
+    from ._evalhyd import _evald
+except ImportError:
+    pass
+
+
+def evald(q_obs: NDArray[dtype('float64')],
+          q_prd: NDArray[dtype('float64')],
+          metrics: List[str],
+          q_thr: NDArray[dtype('float64')] = None,
+          events: str = None,
+          transform: str = None,
+          exponent: float = None,
+          epsilon: float = None,
+          t_msk: NDArray[dtype('bool')] = None,
+          m_cdt: NDArray[dtype('|S32')] = None,
+          bootstrap: Dict[str, int] = None,
+          dts: NDArray[dtype('|S32')] = None,
+          seed: int = None,
+          diagnostics: List[str] = None) -> List[NDArray[dtype('float64')]]:
+    """Function to evaluate deterministic streamflow predictions"""
+
+    # required arguments
+    kwargs = {
+        # convert 1D array into 2D array view
+        'q_obs': q_obs.reshape(1, q_obs.size) if q_obs.ndim == 1 else q_obs,
+        'q_prd': q_prd.reshape(1, q_prd.size) if q_prd.ndim == 1 else q_prd,
+        'metrics': metrics
+    }
+
+    # optional arguments
+    if q_thr is not None:
+        kwargs['q_thr'] = (
+            q_thr.reshape(1, q_thr.size) if q_thr.ndim == 1 else q_thr
+        )
+    if events is not None:
+        kwargs['events'] = events
+    if transform is not None:
+        kwargs['transform'] = transform
+    if exponent is not None:
+        kwargs['exponent'] = exponent
+    if epsilon is not None:
+        kwargs['epsilon'] = epsilon
+    if t_msk is not None:
+        kwargs['t_msk'] = t_msk
+    if m_cdt is not None:
+        kwargs['m_cdt'] = m_cdt
+    if bootstrap is not None:
+        kwargs['bootstrap'] = bootstrap
+    if dts is not None:
+        kwargs['dts'] = dts
+    if seed is not None:
+        kwargs['seed'] = seed
+    if diagnostics is not None:
+        kwargs['diagnostics'] = diagnostics
+
+    # check array ranks
+    _expected = {
+        'q_obs': 2,
+        'q_prd': 2,
+        'q_thr': 2,
+        't_msk': 3,
+        'm_cdt': 2,
+        'dts': 1
+    }
+
+    for arg, val in _expected.items():
+        try:
+            if kwargs[arg].ndim != val:
+                raise RuntimeError(
+                    f"'{arg}' must feature {val} {'axis' if val == 1 else 'axes'}"
+                )
+        except KeyError:
+            pass
+
+    return _evald(**kwargs)
diff --git a/evalhyd/evalp.py b/evalhyd/evalp.py
new file mode 100644
index 0000000000000000000000000000000000000000..4639bea84656b00b49dd3766a53443a058a5443c
--- /dev/null
+++ b/evalhyd/evalp.py
@@ -0,0 +1,72 @@
+from typing import List, Dict
+from numpy import dtype
+from numpy.typing import NDArray
+
+try:
+    from ._evalhyd import _evalp
+except ImportError:
+    pass
+
+
+def evalp(q_obs: NDArray[dtype('float64')],
+          q_prd: NDArray[dtype('float64')],
+          metrics: List[str],
+          q_thr: NDArray[dtype('float64')] = None,
+          events: str = None,
+          c_lvl: NDArray[dtype('float64')] = None,
+          t_msk: NDArray[dtype('bool')] = None,
+          m_cdt: NDArray[dtype('|S32')] = None,
+          bootstrap: Dict[str, int] = None,
+          dts: NDArray[dtype('|S32')] = None,
+          seed: int = None,
+          diagnostics: List[str] = None) -> List[NDArray[dtype('float64')]]:
+    """Function to evaluate probabilistic streamflow predictions"""
+
+    # required arguments
+    kwargs = {
+        'q_obs': q_obs,
+        'q_prd': q_prd,
+        'metrics': metrics
+    }
+
+    # optional arguments
+    if q_thr is not None:
+        kwargs['q_thr'] = q_thr
+    if events is not None:
+        kwargs['events'] = events
+    if c_lvl is not None:
+        kwargs['c_lvl'] = c_lvl
+    if t_msk is not None:
+        kwargs['t_msk'] = t_msk
+    if m_cdt is not None:
+        kwargs['m_cdt'] = m_cdt
+    if bootstrap is not None:
+        kwargs['bootstrap'] = bootstrap
+    if dts is not None:
+        kwargs['dts'] = dts
+    if seed is not None:
+        kwargs['seed'] = seed
+    if diagnostics is not None:
+        kwargs['diagnostics'] = diagnostics
+
+    # check array ranks
+    _expected = {
+        'q_obs': 2,
+        'q_prd': 4,
+        'q_thr': 2,
+        'c_lvl': 1,
+        't_msk': 4,
+        'm_cdt': 2,
+        'dts': 1
+    }
+
+    for arg, val in _expected.items():
+        try:
+            if kwargs[arg].ndim != val:
+                raise RuntimeError(
+                    f"'{arg}' must feature {val} {'axis' if val == 1 else 'axes'}"
+                )
+        except KeyError:
+            pass
+
+    return _evalp(**kwargs)
diff --git a/evalhyd/src/evalhyd.cpp b/evalhyd/src/evalhyd.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..47c424d932c41b4dced0ad4bf248039701b64bad
--- /dev/null
+++ b/evalhyd/src/evalhyd.cpp
@@ -0,0 +1,146 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <array>
+#include <optional>
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+#define FORCE_IMPORT_ARRAY
+#include <xtl/xoptional.hpp>
+#include <xtensor/xview.hpp>
+#include <xtensor-python/pytensor.hpp>
+
+#include "evalhyd/evald.hpp"
+#include "evalhyd/evalp.hpp"
+
+namespace py = pybind11;
+using namespace py::literals;
+
+auto evald(
+    const xt::pytensor<double, 2>& q_obs,
+    const xt::pytensor<double, 2>& q_prd,
+    const std::vector<std::string>& metrics,
+    const xt::pytensor<double, 2>& q_thr,
+    std::optional<std::string> events,
+    std::optional<std::string> transform,
+    std::optional<double> exponent,
+    std::optional<double> epsilon,
+    const xt::pytensor<bool, 3>& t_msk,
+    const xt::pytensor<std::array<char, 32>, 2>& m_cdt,
+    std::optional<std::unordered_map<std::string, int>> bootstrap,
+    const std::vector<std::string>& dts,
+    std::optional<int> seed,
+    std::optional<std::vector<std::string>> diagnostics
+)
+{
+    return evalhyd::evald(
+        q_obs,
+        q_prd,
+        metrics,
+        q_thr,
+        (events.has_value()) ? events.value() : xtl::missing<std::string>(),
+        (transform.has_value()) ? transform.value() : xtl::missing<std::string>(),
+        (exponent.has_value()) ? exponent.value() : xtl::missing<double>(),
+        (epsilon.has_value()) ? epsilon.value() : xtl::missing<double>(),
+        t_msk,
+        m_cdt,
+        (bootstrap.has_value())
+        ? bootstrap.value()
+        : xtl::missing<std::unordered_map<std::string, int>>(),
+        dts,
+        (seed.has_value()) ? seed.value() : xtl::missing<int>(),
+        (diagnostics.has_value())
+        ? diagnostics.value()
+        : xtl::missing<std::vector<std::string>>()
+    );
+}
+
+auto evalp(
+    const xt::pytensor<double, 2>& q_obs,
+    const xt::pytensor<double, 4>& q_prd,
+    const std::vector<std::string>& metrics,
+    const xt::pytensor<double, 2>& q_thr,
+    std::optional<std::string> events,
+    const std::vector<double>& c_lvl,
+    const xt::pytensor<bool, 4>& t_msk,
+    const xt::pytensor<std::array<char, 32>, 2>& m_cdt,
+    std::optional<std::unordered_map<std::string, int>> bootstrap,
+    const std::vector<std::string>& dts,
+    std::optional<int> seed,
+    std::optional<std::vector<std::string>> diagnostics
+)
+{
+    return evalhyd::evalp(
+        q_obs,
+        q_prd,
+        metrics,
+        q_thr,
+        (events.has_value()) ? events.value() : xtl::missing<std::string>(),
+        c_lvl,
+        t_msk,
+        m_cdt,
+        (bootstrap.has_value())
+        ? bootstrap.value()
+        : xtl::missing<std::unordered_map<std::string, int>>(),
+        dts,
+        (seed.has_value()) ? seed.value() : xtl::missing<int>(),
+        (diagnostics.has_value())
+        ? diagnostics.value()
+        : xtl::missing<std::vector<std::string>>()
+    );
+}
+
+// Python Module and Docstrings
+PYBIND11_MODULE(_evalhyd, m)
+{
+    xt::import_numpy();
+
+    m.doc() = "Python bindings for the C++ core of evalhyd";
+
+    // deterministic evaluation
+    m.def(
+        "_evald",
+        &evald,
+        "Function to evaluate determinist streamflow predictions (2D)",
+        py::arg("q_obs"),
+        py::arg("q_prd"),
+        py::arg("metrics"),
+        py::arg("q_thr") = xt::pytensor<double, 2>({0}),
+        py::arg("events") = py::none(),
+        py::arg("transform") = py::none(),
+        py::arg("exponent") = py::none(),
+        py::arg("epsilon") = py::none(),
+        py::arg("t_msk") = xt::pytensor<bool, 3>({0}),
+        py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 2>({0}),
+        py::arg("bootstrap") = py::none(),
+        py::arg("dts") = py::list(),
+        py::arg("seed") = py::none(),
+        py::arg("diagnostics") = py::none()
+    );
+
+    // probabilistic evaluation
+    m.def(
+        "_evalp",
+        &evalp,
+        "Function to evaluate probabilist streamflow predictions",
+        py::arg("q_obs"),
+        py::arg("q_prd"),
+        py::arg("metrics"),
+        py::arg("q_thr") = xt::pytensor<double, 2>({0}),
+        py::arg("events") = py::none(),
+        py::arg("c_lvl") = py::list(),
+        py::arg("t_msk") = xt::pytensor<bool, 4>({0}),
+        py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 2>({0}),
+        py::arg("bootstrap") = py::none(),
+        py::arg("dts") = py::list(),
+        py::arg("seed") = py::none(),
+        py::arg("diagnostics") = py::none()
+    );
+
+#ifdef VERSION_INFO
+    m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
+#else
+    m.attr("__version__") = "dev";
+#endif
+}
diff --git a/evalhyd/version.py b/evalhyd/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f4f4229e6a5ee0683a3b13e34a2906e7538c79b
--- /dev/null
+++ b/evalhyd/version.py
@@ -0,0 +1,3 @@
+
+
+__version__ = '0.1.0.0'
diff --git a/pyproject.toml b/pyproject.toml
index fd3ad8a0e0588c2bd9c545812d1848eb30f5993c..fbc35a1bba84828279cfa3dd10d09ee62c774f92 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,10 @@
 requires = [
     "setuptools>=42",
     "wheel",
+    "scikit-build",
+    "cmake",
     "pybind11>=2.8.0",
+    "ninja",
     "numpy>1.16",
 ]
 
diff --git a/setup.py b/setup.py
index 74ea01e21404ed01e171352682b52e7b4966952b..cfa265bf529fbc65f7fb736ae73c637598687251 100644
--- a/setup.py
+++ b/setup.py
@@ -1,39 +1,43 @@
 import sys
 import os
 
-from pybind11 import get_cmake_dir
 from pybind11.setup_helpers import Pybind11Extension, build_ext
 from setuptools import setup
 import numpy
 
 
-__version__ = '0.0.1'
+# collect centrally sourced package version
+with open("evalhyd/version.py", 'r') as fv:
+    exec(fv.read())
 
+# vendor dependencies (unless told otherwise via environment variable)
+deps = ['xtl', 'xtensor', 'xtensor-python', 'evalhyd-cpp']
+deps_blank_path = os.path.join(os.getcwd(), 'deps', '{}', 'include')
 
+deps_include_dirs = []
+for dep in deps:
+    if not os.getenv(f"EVALHYD_PYTHON_VENDOR_{dep.upper().replace('-', '_')}") == 'FALSE':
+        # register dependency headers
+        deps_include_dirs.append(deps_blank_path.format(dep))
+        print(f"vendoring {dep}")
+
+# configure Python extension
 ext_modules = [
     Pybind11Extension(
-        "evalhyd",
-        ['src/evalhyd-python.cpp',
-         'deps/evalhyd/src/probabilist/evaluator_brier.cpp',
-         'deps/evalhyd/src/probabilist/evaluator_elements.cpp',
-         'deps/evalhyd/src/probabilist/evaluator_quantiles.cpp'],
+        "evalhyd._evalhyd",
+        ['evalhyd/src/evalhyd.cpp'],
         include_dirs=[
             numpy.get_include(),
-            os.path.join(os.getcwd(), 'deps', 'evalhyd', 'deps', 'xtl',
-                         'include'),
-            os.path.join(os.getcwd(), 'deps', 'evalhyd', 'deps', 'xtensor',
-                         'include'),
-            os.path.join(os.getcwd(), 'deps', 'xtensor-python', 'include'),
-            os.path.join(os.getcwd(), 'deps', 'evalhyd', 'include'),
-            os.path.join(os.getcwd(), 'deps', 'evalhyd', 'src'),
             os.path.join(sys.prefix, 'include'),
-            os.path.join(sys.prefix, 'Library', 'include')
+            os.path.join(sys.prefix, 'Library', 'include'),
+            *deps_include_dirs
         ],
         language='c++',
-        define_macros=[('VERSION_INFO', __version__)],
+        define_macros=[('VERSION_INFO', __version__)]
     ),
 ]
 
+# build Python extension and install Python package
 setup(
     name='evalhyd-python',
     version=__version__,
@@ -42,8 +46,9 @@ setup(
     url='https://gitlab.irstea.fr/hycar-hydro/evalhyd/evalhyd-python',
     description='Python bindings for EvalHyd',
     long_description='An evaluator for streamflow predictions.',
+    packages=["evalhyd"],
     ext_modules=ext_modules,
     cmdclass={'build_ext': build_ext},
-    extras_require={"tests": "numpy>=1.16"},
+    extras_require={'tests': 'numpy>=1.16'},
     zip_safe=False,
 )
diff --git a/src/evalhyd-python.cpp b/src/evalhyd-python.cpp
deleted file mode 100644
index 8cf2c41198db7ea4ccb4b3c57b50e7e364e6c404..0000000000000000000000000000000000000000
--- a/src/evalhyd-python.cpp
+++ /dev/null
@@ -1,362 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <array>
-
-#define STRINGIFY(x) #x
-#define MACRO_STRINGIFY(x) STRINGIFY(x)
-
-#define FORCE_IMPORT_ARRAY
-#include <xtensor/xview.hpp>
-#include <xtensor-python/pytensor.hpp>
-
-#include "evalhyd/evald.hpp"
-#include "evalhyd/evalp.hpp"
-
-namespace py = pybind11;
-using namespace py::literals;
-
-// reshape 1D tensors to 2D tensors
-auto evald_1d(
-    const xt::xtensor<double, 1>& q_obs,
-    const xt::xtensor<double, 1>& q_prd,
-    const std::vector<std::string>& metrics,
-    const std::string& transform = "none",
-    const double exponent = 1,
-    double epsilon = -9,
-    const xt::xtensor<bool, 2>& t_msk = {},
-    const xt::xtensor<std::array<char, 32>, 1>& m_cdt = {},
-    const std::unordered_map<std::string, int>& bootstrap =
-        {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}},
-    const std::vector<std::string>& dts = {}
-)
-{
-    return evalhyd::evald(
-        xt::view(q_obs, xt::newaxis(), xt::all()),
-        xt::view(q_prd, xt::newaxis(), xt::all()),
-        metrics,
-        transform,
-        exponent,
-        epsilon,
-        t_msk,
-        m_cdt,
-        bootstrap,
-        dts
-    );
-}
-
-// Python Module and Docstrings
-PYBIND11_MODULE(evalhyd, m)
-{
-    xt::import_numpy();
-
-    m.doc() = R"pbdoc(
-        Utility for evaluation of streamflow predictions.
-    )pbdoc";
-
-    // deterministic evaluation
-    m.def(
-        "evald", evald_1d,
-        R"pbdoc(
-            Function to evaluate deterministic streamflow predictions.
-
-            :Parameters:
-
-                q_obs: `numpy.ndarray`
-                   1D array of streamflow observations. Time steps with
-                   missing observations must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (time,)
-
-                q_prd: `numpy.ndarray`
-                   1D array of streamflow predictions. Time steps with
-                   missing predictions must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (time,)
-
-                metrics: `List[str]`
-                   The sequence of evaluation metrics to be computed.
-
-                transform: `str`, optional
-                   The transformation to apply to both streamflow observations
-                   and predictions prior to the calculation of the *metrics*.
-
-                exponent: `float`, optional
-                   The value of the exponent n to use when the *transform* is
-                   the power function. If not provided (or set to default value
-                   1), the streamflow observations and predictions remain
-                   untransformed.
-
-                epsilon: `float`, optional
-                   The value of the small constant Îµ to add to both the
-                   streamflow observations and predictions prior to the
-                   calculation of the *metrics* when the *transform* is the
-                   reciprocal function, the natural logarithm, or the power
-                   function with a negative exponent (since none are defined
-                   for 0). If not provided (or set to default value -9),
-                   one hundredth of the mean of the streamflow observations
-                   is used as value for epsilon.
-
-                t_msk: `numpy.ndarray`, optional
-                   1D array of mask(s) used to generate temporal subsets of
-                   the whole streamflow time series (where True/False is used for
-                   the time steps to include/discard in a given subset). If not
-                   provided and neither is *m_cdt*, no subset is performed. If
-                   provided, masks must feature the same number of dimensions as
-                   observations and predictions, and it must broadcastable with
-                   both of them.
-                   shape: (subsets, time)
-
-                m_cdt: `numpy.ndarray`, optional
-                   1D array of masking condition(s) to use to generate
-                   temporal subsets. Each condition consists in a string and
-                   can be specified on observed streamflow values/statistics
-                   (mean, median, quantile), or on time indices. If provided
-                   in combination with *t_msk*, the latter takes precedence.
-                   If not provided and neither is *t_msk*, no subset is
-                   performed. If provided, only one condition per time series
-                   of observations can be provided.
-                   shape: (subsets,)
-
-                bootstrap: `dict`, optional
-                   Parameters for the bootstrapping method used to estimate the
-                   sampling uncertainty in the evaluation of the predictions.
-                   Three parameters are mandatory ('n_samples' the number of
-                   random samples, 'len_sample' the length of one sample in
-                   number of years, and 'summary' the statistics to return to
-                   characterise the sampling distribution), and one parameter
-                   is optional ('seed'). If not provided, no bootstrapping is
-                   performed. If provided, *dts* must also be provided.
-
-                dts: `List[str]`, optional
-                   Datetimes. The corresponding date and time for the temporal
-                   dimension of the streamflow observations and predictions.
-                   The date and time must be specified in a string following
-                   the ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss"
-                   (e.g. the 21st of May 2007 at 4 in the afternoon is
-                   "2007-05-21 16:00:00"). If provided, it is only used if
-                   *bootstrap* is also provided.
-                   shape: (time,)
-
-            :Returns:
-
-                `List[numpy.ndarray]`
-                    The sequence of evaluation metrics computed
-                    in the same order as given in *metrics*.
-                    shape: [(1, subsets, samples), ...]
-        )pbdoc",
-        py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"),
-        py::arg("transform") = "none",
-        py::arg("exponent") = 1,
-        py::arg("epsilon") = -9,
-        py::arg("t_msk") = xt::pytensor<bool, 2>({0}),
-        py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 1>({}),
-        py::arg("bootstrap") =
-            py::dict("n_samples"_a=-9, "len_sample"_a=-9, "summary"_a=0),
-        py::arg("dts") = py::list()
-    );
-    m.def(
-        "evald", evalhyd::evald,
-        R"pbdoc(
-            Function to evaluate deterministic streamflow predictions.
-
-            :Parameters:
-
-                q_obs: `numpy.ndarray`
-                   2D array of streamflow observations. Time steps with
-                   missing observations must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (1, time)
-
-                q_prd: `numpy.ndarray`
-                   2D array of streamflow predictions. Time steps with
-                   missing predictions must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (series, time)
-
-                metrics: `List[str]`
-                   The sequence of evaluation metrics to be computed.
-
-                transform: `str`, optional
-                   The transformation to apply to both streamflow observations
-                   and predictions prior to the calculation of the *metrics*.
-
-                exponent: `float`, optional
-                   The value of the exponent n to use when the *transform* is
-                   the power function. If not provided (or set to default value
-                   1), the streamflow observations and predictions remain
-                   untransformed.
-
-                epsilon: `float`, optional
-                   The value of the small constant Îµ to add to both the
-                   streamflow observations and predictions prior to the
-                   calculation of the *metrics* when the *transform* is the
-                   reciprocal function, the natural logarithm, or the power
-                   function with a negative exponent (since none are defined
-                   for 0). If not provided (or set to default value -9),
-                   one hundredth of the mean of the streamflow observations
-                   is used as value for epsilon.
-
-                t_msk: `numpy.ndarray`, optional
-                   2D array of mask(s) used to generate temporal subsets of
-                   the whole streamflow time series (where True/False is used for
-                   the time steps to include/discard in a given subset). If not
-                   provided and neither is *m_cdt*, no subset is performed. If
-                   provided, masks must feature the same number of dimensions as
-                   observations and predictions, and it must broadcastable with
-                   both of them.
-                   shape: (subsets, time)
-
-                m_cdt: `numpy.ndarray`, optional
-                   1D array of masking condition(s) to use to generate
-                   temporal subsets. Each condition consists in a string and
-                   can be specified on observed streamflow values/statistics
-                   (mean, median, quantile), or on time indices. If provided
-                   in combination with *t_msk*, the latter takes precedence.
-                   If not provided and neither is *t_msk*, no subset is
-                   performed. If provided, only one condition per time series
-                   of observations can be provided.
-                   shape: (subsets,)
-
-                bootstrap: `dict`, optional
-                   Parameters for the bootstrapping method used to estimate the
-                   sampling uncertainty in the evaluation of the predictions.
-                   Three parameters are mandatory ('n_samples' the number of
-                   random samples, 'len_sample' the length of one sample in
-                   number of years, and 'summary' the statistics to return to
-                   characterise the sampling distribution), and one parameter
-                   is optional ('seed'). If not provided, no bootstrapping is
-                   performed. If provided, *dts* must also be provided.
-
-                dts: `List[str]`, optional
-                   Datetimes. The corresponding date and time for the temporal
-                   dimension of the streamflow observations and predictions.
-                   The date and time must be specified in a string following
-                   the ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss"
-                   (e.g. the 21st of May 2007 at 4 in the afternoon is
-                   "2007-05-21 16:00:00"). If provided, it is only used if
-                   *bootstrap* is also provided.
-                   shape: (time,)
-
-            :Returns:
-
-                `List[numpy.ndarray]`
-                   The sequence of evaluation metrics computed
-                   in the same order as given in *metrics*.
-                   shape: [(series, subsets, samples), ...]
-        )pbdoc",
-        py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"),
-        py::arg("transform") = "none",
-        py::arg("exponent") = 1,
-        py::arg("epsilon") = -9,
-        py::arg("t_msk") = xt::pytensor<bool, 2>({0}),
-        py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 1>({}),
-        py::arg("bootstrap") =
-            py::dict("n_samples"_a=-9, "len_sample"_a=-9, "summary"_a=0),
-        py::arg("dts") = py::list()
-    );
-
-    // probabilistic evaluation
-    m.def(
-        "evalp", evalhyd::evalp,
-        R"pbdoc(
-            Function to evaluate probabilistic streamflow predictions.
-
-            :Parameters:
-
-                q_obs: `numpy.ndarray`
-                   2D array of streamflow observations. Time steps with
-                   missing observations must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (sites, time)
-
-                q_prd: `numpy.ndarray`
-                   4D array of streamflow predictions. Time steps with
-                   missing predictions must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (sites, lead times, members, time)
-
-                metrics: `List[str]`
-                   The sequence of evaluation metrics to be computed.
-
-                q_thr: `List[float]`, optional
-                   The streamflow threshold(s) to consider for the *metrics*
-                   assessing the prediction of exceedance events. If not
-                   provided, set to default value as an empty `list`.
-                   shape: (thresholds,)
-
-                t_msk: `numpy.ndarray`, optional
-                   4D array of masks to generate temporal subsets of the whole
-                   streamflow time series (where True/False is used for the
-                   time steps to include/discard in a given subset). If not
-                   provided, no subset is performed and only one set of metrics
-                   is returned corresponding to the whole time series. If
-                   provided, as many sets of metrics are returned as they are
-                   masks provided.
-                   shape: (sites, lead times, subsets, time)
-
-                m_cdt: `numpy.ndarray`, optional
-                   2D array of conditions to generate temporal subsets. Each
-                   condition consists in a string and can be specified on
-                   observed/predicted streamflow values/statistics (mean,
-                   median, quantile), or on time indices. If provided in
-                   combination with t_msk, the latter takes precedence. If not
-                   provided and neither is t_msk, no subset is performed and
-                   only one set of metrics is returned corresponding to the
-                   whole time series. If provided, as many sets of metrics are
-                   returned as they are conditions provided.
-                   shape: (sites, subsets)
-
-                bootstrap: `dict`, optional
-                   Parameters for the bootstrapping method used to estimate the
-                   sampling uncertainty in the evaluation of the predictions.
-                   Three parameters are mandatory ('n_samples' the number of
-                   random samples, 'len_sample' the length of one sample in
-                   number of years, and 'summary' the statistics to return to
-                   characterise the sampling distribution), and one parameter
-                   is optional ('seed'). If not provided, no bootstrapping is
-                   performed. If provided, *dts* must also be provided.
-
-                dts: `List[str]`, optional
-                   Datetimes. The corresponding date and time for the temporal
-                   dimension of the streamflow observations and predictions.
-                   The date and time must be specified in a string following
-                   the ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss"
-                   (e.g. the 21st of May 2007 at 4 in the afternoon is
-                   "2007-05-21 16:00:00"). If provided, it is only used if
-                   *bootstrap* is also provided.
-                   shape: (time,)
-
-            :Returns:
-
-                `List[numpy.ndarray]`
-                   The sequence of evaluation metrics computed
-                   in the same order as given in *metrics*.
-                   shape: [(sites, lead times, subsets, samples, {quantiles,} {thresholds,} {components}), ...]
-        )pbdoc",
-        py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"),
-        py::arg("q_thr") = xt::pytensor<double, 2>({0}),
-        py::arg("t_msk") = xt::pytensor<bool, 4>({0}),
-        py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 2>({0}),
-        py::arg("bootstrap") =
-            py::dict("n_samples"_a=-9, "len_sample"_a=-9, "summary"_a=0),
-        py::arg("dts") = py::list()
-    );
-
-#ifdef VERSION_INFO
-    m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
-#else
-    m.attr("__version__") = "dev";
-#endif
-}
diff --git a/tests/expected/evald/CONT_TBL.csv b/tests/expected/evald/CONT_TBL.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2a6370d276de47340b13af813dc3d952d6139c34
--- /dev/null
+++ b/tests/expected/evald/CONT_TBL.csv
@@ -0,0 +1,204 @@
+157.,19.,15.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+157.,19.,15.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+157.,19.,15.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+157.,19.,15.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,19.,14.,120.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,21.,14.,118.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,21.,14.,118.
+200.,15.,8.,88.
+220.,21.,6.,64.
+nan,nan,nan,nan
+158.,21.,14.,118.
+200.,16.,8.,87.
+221.,21.,5.,64.
+nan,nan,nan,nan
diff --git a/tests/expected/evald/KGE.csv b/tests/expected/evald/KGE.csv
new file mode 100644
index 0000000000000000000000000000000000000000..65b1265ba054dbeee0465666a960c4b6c07944c3
--- /dev/null
+++ b/tests/expected/evald/KGE.csv
@@ -0,0 +1,51 @@
+0.7480876678384525
+0.74610619665192
+0.7441110304778197
+0.7430108522656984
+0.7417677706194681
+0.740519915124128
+0.7396393314765528
+0.7391812106418076
+0.7385521156240031
+0.7374975605864584
+0.736478762920044
+0.7356032352557134
+0.7349262719558889
+0.7341531483736209
+0.7335193136927298
+0.732498016247827
+0.7316031283668971
+0.7311620062353068
+0.7304853804554484
+0.7298301318606002
+0.7291682672297097
+0.7284933080332816
+0.7278420198262487
+0.7273338548948837
+0.7266696338186898
+0.7261028872180326
+0.7255515136947399
+0.7249203100577184
+0.724129099815763
+0.7235915471922136
+0.723195030128365
+0.7223157825504646
+0.7214401411915639
+0.7203988937539173
+0.7197737983854688
+0.7188157660001235
+0.7176518268945717
+0.716230562324343
+0.7149933138365094
+0.7133901818967825
+0.7126230134351779
+0.711672543996632
+0.7101399598194
+0.7086263896776204
+0.7068405183946846
+0.7050500737470602
+0.7031816136500466
+0.7006732963875493
+0.6961818766730593
+0.6916216736996625
+0.6764337637969222
diff --git a/tests/expected/evald/KGEPRIME.csv b/tests/expected/evald/KGEPRIME.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b3ab57461cefacf8d86dc335cc9a7bb86c323507
--- /dev/null
+++ b/tests/expected/evald/KGEPRIME.csv
@@ -0,0 +1,51 @@
+0.8131407494929581
+0.8127748549543973
+0.8120324184047302
+0.8117867087643632
+0.8113865804825375
+0.8110546552600805
+0.8110115513420257
+0.8109282579595157
+0.8107808794738488
+0.8102714601853906
+0.8098723493335118
+0.80972964340455
+0.8096359354316549
+0.8093267143355573
+0.8091370309262083
+0.80876520882257
+0.808297843099951
+0.8082504172955576
+0.8079588362318786
+0.8077792859196675
+0.8075824101888797
+0.8072872435440667
+0.8071551289052044
+0.8070989073590911
+0.8068619291248411
+0.8067928045232984
+0.8066998263051663
+0.806455314511177
+0.806108392682091
+0.8060009627097642
+0.8059219799600571
+0.8056200993568368
+0.805444123201897
+0.805182363569416
+0.8049830239354914
+0.8048140123227603
+0.8043601618202886
+0.8037118677795622
+0.8035729804192682
+0.8027506521878371
+0.8024589351470541
+0.8021804568386013
+0.8017099458589753
+0.8013131114124993
+0.8004747893474917
+0.7998057696216888
+0.7995518852898957
+0.7986597353849383
+0.7971020515928053
+0.7958136756510419
+0.7899796163833354
diff --git a/tests/expected/evald/KGEPRIME_D.csv b/tests/expected/evald/KGEPRIME_D.csv
new file mode 100644
index 0000000000000000000000000000000000000000..558a3361645bc094d86d5c9b20fc189c013d9873
--- /dev/null
+++ b/tests/expected/evald/KGEPRIME_D.csv
@@ -0,0 +1,51 @@
+0.9071248643948864,1.1477333530243923,1.0668239858924582
+0.9077537938998346,1.1478429758547268,1.0684568980300118
+0.9080496949392161,1.1484028026509738,1.0696675053044051
+0.9082556740306482,1.1484209022637022,1.0705581846521615
+0.9084737485198940,1.1486630576436567,1.0713951161082431
+0.9089321810875520,1.1489428726605424,1.0722704389155211
+0.9093310954815347,1.1488135780819770,1.0731457617227991
+0.9094546896430078,1.1488016399441205,1.0735373535050023
+0.9095397983553178,1.1487856134230323,1.0740517976110342
+0.9095735837396087,1.1491999363901180,1.0745611228702399
+0.9096311356178044,1.1494305824532358,1.0751830627596215
+0.9101044432964827,1.1495392868849328,1.0758920230450484
+0.9102441911392056,1.1494163423692945,1.0765318788983218
+0.9102574967893201,1.1495752574168396,1.0770053722297441
+0.9103829299622012,1.1496597741688170,1.0774558307504485
+0.9104389606151662,1.1498193287039202,1.0781263996846790
+0.9103708026184928,1.1501613638087465,1.0785359074307741
+0.9106545061768646,1.1501897035858120,1.0789198209427380
+0.9106680754546872,1.1503679937353128,1.0793037344547021
+0.9107871131080774,1.1504053916036157,1.0798002625968421
+0.9109331668775991,1.1504930666773272,1.0802711965048515
+0.9109503931900584,1.1506796793881644,1.0806474317465762
+0.9112916063517488,1.1507819870125124,1.0811465193121295
+0.9116384611328783,1.1508213892258903,1.0815841807157685
+0.9116240546641530,1.1508649226798111,1.0820474363535382
+0.9117728671872545,1.1507388089259101,1.0826002718107666
+0.9119101939393581,1.1506625843082945,1.0831019187997328
+0.9119150575215442,1.1507542944710545,1.0835088671224147
+0.9119108416265477,1.1509472994224532,1.0839593256431190
+0.9120349545371204,1.1508964568748929,1.0844277001277152
+0.9120974194539910,1.1508349399490534,1.0847834599821353
+0.9121099264543083,1.1508757330670814,1.0854130781417561
+0.9125099729643541,1.1509349546260605,1.0861169195803568
+0.9129864907072712,1.1511152707612748,1.0868719494872194
+0.9130727481787702,1.1511623991072226,1.0873224080079240
+0.9137178424599028,1.1513164779920453,1.0880697596445472
+0.9137419646569960,1.1514764306974452,1.0888222301279968
+0.9136897426066677,1.1518176713888661,1.0896156513860558
+0.9144254206019433,1.1517387012476892,1.0907520353814693
+0.9142984381688551,1.1522701343488451,1.0915224218288104
+0.9142709614317646,1.1523150149572656,1.0920496630519076
+0.9144137451984202,1.1523110423287639,1.0927842175714655
+0.9149046185657690,1.1525453982271228,1.0938489377113123
+0.9155452591872579,1.1527254670882050,1.0949674057428342
+0.9155186197874120,1.1532046716768467,1.0959246300993313
+0.9154368750377250,1.1532273515892346,1.0972017823824649
+0.9167750313811954,1.1532539657633949,1.0988244568263661
+0.9169078074034277,1.1533196303650488,1.1006314097560101
+0.9175076075676912,1.1535549186612073,1.1038434861394426
+0.9179463597643975,1.1527797412590994,1.1077849981956065
+0.9179618893753376,1.1523899554203556,1.1189773567810644
diff --git a/tests/expected/evald/KGE_D.csv b/tests/expected/evald/KGE_D.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7d65cb59f4074a5036b38bfead4a934248bdf974
--- /dev/null
+++ b/tests/expected/evald/KGE_D.csv
@@ -0,0 +1,51 @@
+0.9071248643948864,1.2244294704151979,1.0668239858924582
+0.9077537938998346,1.2264207454072791,1.0684568980300118
+0.9080496949392161,1.2284091609962542,1.0696675053044051
+0.9082556740306482,1.2294513963440263,1.0705581846521615
+0.9084737485198940,1.2306719900133749,1.0713951161082431
+0.9089321810875520,1.2319774783565793,1.0722704389155211
+0.9093310954815347,1.2328444223282775,1.0731457617227991
+0.9094546896430078,1.2332814722478178,1.0735373535050023
+0.9095397983553178,1.2338552531667024,1.0740517976110342
+0.9095735837396087,1.2348855740497733,1.0745611228702399
+0.9096311356178044,1.2358482940716458,1.0751830627596215
+0.9101044432964827,1.2367801489363925,1.0758920230450484
+0.9102441911392056,1.2373833346872534,1.0765318788983218
+0.9102574967893201,1.2380987280203271,1.0770053722297441
+0.9103829299622012,1.2387076270574358,1.0774558307504485
+0.9104389606151662,1.2396505731434120,1.0781263996846790
+0.9103708026184928,1.2404903302072829,1.0785359074307741
+0.9106545061768646,1.2409624690429850,1.0789198209427380
+0.9106680754546872,1.2415964716356864,1.0793037344547021
+0.9107871131080774,1.2422080439464072,1.0798002625968421
+0.9109331668775991,1.2428445217100519,1.0802711965048515
+0.9109503931900584,1.2434790402937934,1.0806474317465762
+0.9112916063517488,1.2441639397456739,1.0811465193121295
+0.9116384611328783,1.2447102094160669,1.0815841807157685
+0.9116240546641530,1.2452904391749027,1.0820474363535382
+0.9117728671872545,1.2457901473263877,1.0826002718107666
+0.9119101939393581,1.2462848529553729,1.0831019187997328
+0.9119150575215442,1.2468524819385858,1.0835088671224147
+0.9119108416265477,1.2475800585327315,1.0839593256431190
+0.9120349545371204,1.2480639978139763,1.0844277001277152
+0.9120974194539910,1.2484067080262666,1.0847834599821353
+0.9121099264543083,1.2491755719869908,1.0854130781417561
+0.9125099729643541,1.2500499275558148,1.0861169195803568
+0.9129864907072712,1.2511148984168152,1.0868719494872194
+0.9130727481787702,1.2516846718054440,1.0873224080079240
+0.9137178424599028,1.2527126434836113,1.0880697596445472
+0.9137419646569960,1.2537531352118179,1.0888222301279968
+0.9136897426066677,1.2550385622883493,1.0896156513860558
+0.9144254206019433,1.2562613326135268,1.0907520353814693
+0.9142984381688551,1.2577286876454601,1.0915224218288104
+0.9142709614317646,1.2583852238137356,1.0920496630519076
+0.9144137451984202,1.2592273207901978,1.0927842175714655
+0.9149046185657690,1.2607105595147998,1.0938489377113123
+0.9155452591872579,1.2621968142312687,1.0949674057428342
+0.9155186197874120,1.2638254032362688,1.0959246300993313
+0.9154368750377250,1.2653231056559175,1.0972017823824649
+0.9167750313811954,1.2672236625128150,1.0988244568263661
+0.9169078074034277,1.2693798106679639,1.1006314097560101
+0.9175076075676912,1.2733440828682880,1.1038434861394426
+0.9179463597643975,1.2770321035906431,1.1077849981956065
+0.9179618893753376,1.2894982662973180,1.1189773567810644
diff --git a/tests/expected/evald/MAE.csv b/tests/expected/evald/MAE.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ef9cf0e5aa19cebfd335aaaafd5f73f21178bff5
--- /dev/null
+++ b/tests/expected/evald/MAE.csv
@@ -0,0 +1,51 @@
+265.1929260450160655
+265.6816720257234579
+265.7041800643086731
+265.7041800643086731
+265.8360128617363216
+266.1318327974276485
+266.4019292604501743
+266.3729903536977304
+266.5048231511253789
+266.6816720257234579
+266.6977491961415012
+266.8360128617363216
+267.0482315112540164
+267.3215434083601281
+267.4758842443729918
+267.8617363344051228
+268.0160771704179865
+267.9389067524115831
+268.1511254019292778
+268.1382636655948772
+268.3311897106109427
+268.5144694533761935
+268.4726688102894059
+268.3344051446945286
+268.5369774919614088
+268.6527331189710708
+268.7556270096462754
+268.9260450160771825
+269.0739549839228175
+269.2443729903537246
+269.4147909967845749
+269.7491961414791035
+269.7909967845658912
+269.9099678456591391
+270.0643086816720029
+269.9421221864951690
+270.1864951768488936
+270.6623794212218854
+271.1061093247588474
+271.5852090032154251
+271.9067524115755532
+272.1286173633440626
+272.3279742765273568
+272.6784565916398719
+273.4501607717041907
+274.5530546623793953
+274.8617363344051228
+276.1286173633440626
+278.5176848874597795
+281.2700964630225258
+291.2990353697749129
diff --git a/tests/expected/evald/MARE.csv b/tests/expected/evald/MARE.csv
new file mode 100644
index 0000000000000000000000000000000000000000..bb93762afc8ff3ed5f1bad0080f8eaf37ae81c78
--- /dev/null
+++ b/tests/expected/evald/MARE.csv
@@ -0,0 +1,51 @@
+0.2110884459948862
+0.2114774783536764
+0.2114953943175681
+0.2114953943175681
+0.2116003306775049
+0.2118357976315096
+0.2120507891982094
+0.2120277543874916
+0.2121326907474284
+0.2122734590351485
+0.2122862561522140
+0.2123963113589770
+0.2125652333042412
+0.2127827842943542
+0.2129056366181827
+0.2132127674277538
+0.2133356197515823
+0.2132741935896681
+0.2134431155349323
+0.2134328778412799
+0.2135864432460655
+0.2137323303806118
+0.2136990578762416
+0.2135890026694786
+0.2137502463445035
+0.2138423855873749
+0.2139242871365938
+0.2140599365774878
+0.2141776700544901
+0.2143133194953841
+0.2144489689362780
+0.2147151489712397
+0.2147484214756099
+0.2148431201418944
+0.2149659724657229
+0.2148687143760253
+0.2150632305554205
+0.2154420252205583
+0.2157952256515652
+0.2161765797401161
+0.2164325220814255
+0.2166091222969289
+0.2167678065485407
+0.2170467837005679
+0.2176610453197104
+0.2185389275504014
+0.2187846321980584
+0.2197930450228172
+0.2216946966187457
+0.2238855630603538
+0.2318684046857924
diff --git a/tests/expected/evald/MSE.csv b/tests/expected/evald/MSE.csv
new file mode 100644
index 0000000000000000000000000000000000000000..53a1eb3781e0e67ab67632c05a929f6d1aff72f9
--- /dev/null
+++ b/tests/expected/evald/MSE.csv
@@ -0,0 +1,51 @@
+603782.2604501608293504
+603540.1704180064843968
+604973.1768488745437935
+605519.1061093247262761
+606241.1157556270482019
+605823.9710610932670534
+605116.8520900321891531
+605160.5144694533664733
+605628.1511254019569606
+607006.1800643086899072
+608195.0578778134658933
+607157.1061093247262761
+607415.4598070739302784
+608465.9453376205638051
+608766.6463022507959977
+609964.8456591640133411
+611618.5176848875125870
+610871.5080385851906613
+611795.5273311897180974
+612155.2250803858041763
+612401.4630225080763921
+613310.6237942122388631
+612593.7202572347596288
+611633.2090032154228538
+612660.8906752411276102
+612724.9549839228857309
+612831.2958199357381091
+613728.3408360128523782
+614918.0514469452900812
+615075.4372990353731439
+615330.4244372990215197
+616544.5594855305971578
+615854.5048231511609629
+615046.1800643086899072
+615534.5530546624213457
+613767.3826366559369490
+615365.1704180064843968
+617751.3633440514095128
+615900.7909967845771462
+618968.0353697749087587
+620238.9099678456550464
+620927.2025723472470418
+620784.1286173633998260
+619856.3247588424710557
+622720.4019292604643852
+625799.9421221865341067
+621881.5369774919236079
+624982.4630225080763921
+628774.5691318328026682
+633351.0771704179933295
+656835.5305466237477958
diff --git a/tests/expected/evald/NSE.csv b/tests/expected/evald/NSE.csv
new file mode 100644
index 0000000000000000000000000000000000000000..578721bc7dab93dc5517cb15d48d9d81844d6e7d
--- /dev/null
+++ b/tests/expected/evald/NSE.csv
@@ -0,0 +1,51 @@
+0.7189121923160171
+0.7190248961181289
+0.7183577671505612
+0.7181036125173065
+0.7177674845422075
+0.7179616841657375
+0.7182908798615486
+0.7182705530594651
+0.718052847156156
+0.7174113126846504
+0.7168578365723296
+0.7173410498202125
+0.7172207745500294
+0.7167317262719881
+0.7165917364437786
+0.7160339207336052
+0.7152640620034193
+0.7156118286033213
+0.7151816560490262
+0.7150142005632107
+0.7148995657223857
+0.714476310478086
+0.7148100613295596
+0.715257222533744
+0.7147787904778381
+0.7147489656597577
+0.7146994592160939
+0.7142818444011523
+0.7137279805841595
+0.7136547103888884
+0.7135360024036145
+0.7129707679121519
+0.7132920194045123
+0.7136683309479988
+0.7134409713480524
+0.7142636686863795
+0.7135198265862999
+0.7124089464193454
+0.7132704711081728
+0.7118425308507162
+0.7112508815459708
+0.7109304503708312
+0.710997057734077
+0.7114289921740817
+0.710095635390346
+0.7086619708754944
+0.7104861647677075
+0.7090425441765131
+0.7072771481677261
+0.705146577768962
+0.6942135081069736
diff --git a/tests/expected/evald/RMSE.csv b/tests/expected/evald/RMSE.csv
new file mode 100644
index 0000000000000000000000000000000000000000..42b7512dae33e1eea96a9afcc51236c16e3ba7ce
--- /dev/null
+++ b/tests/expected/evald/RMSE.csv
@@ -0,0 +1,51 @@
+777.0342723780984
+776.8784785396018
+777.8002165394881
+778.1510818018085
+778.6148699810626
+778.3469477431598
+777.8925710469488
+777.9206350711191
+778.2211453856814
+779.1060133668002
+779.8686157794872
+779.2028658246354
+779.3686289600538
+780.0422715068848
+780.2349942820117
+781.0024620058275
+782.0604309673821
+781.5826943059737
+782.1735915582868
+782.4034925026765
+782.5608366271009
+783.1415094312216
+782.6836655106805
+782.0698236111757
+782.7265746576138
+782.7674973987633
+782.8354206472366
+783.4081572437275
+784.1671068381696
+784.267452658234
+784.4299997050719
+785.2035146925481
+784.7639803298513
+784.2487998488162
+784.5601016204319
+783.4330747655832
+784.4521466718072
+785.971604667784
+784.7934702816943
+786.7452162992635
+787.5524807705488
+787.9893416616416
+787.8985522371287
+787.3095482456963
+789.1263535893733
+791.0751810809049
+788.5946594908514
+790.5583236058602
+792.9530686817681
+795.833573789406
+810.4539040233095
diff --git a/tests/expected/evalp/AS.csv b/tests/expected/evalp/AS.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2062dbb440cef55fc54fcc1c82abe591701a9b02
--- /dev/null
+++ b/tests/expected/evalp/AS.csv
@@ -0,0 +1 @@
+0.4914810317862
diff --git a/tests/expected/evalp/AW.csv b/tests/expected/evalp/AW.csv
new file mode 100644
index 0000000000000000000000000000000000000000..40622cd35ff86f9f32c117c5d13b4572137f2d13
--- /dev/null
+++ b/tests/expected/evalp/AW.csv
@@ -0,0 +1 @@
+9.2749196141479,31.3215434083601
diff --git a/tests/expected/evalp/AWI.csv b/tests/expected/evalp/AWI.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8b3b7e7f9598b23911dc06f440d71572b99b48e5
--- /dev/null
+++ b/tests/expected/evalp/AWI.csv
@@ -0,0 +1 @@
+0.9821120161733,0.9880951944476
diff --git a/tests/expected/evalp/AWN.csv b/tests/expected/evalp/AWN.csv
new file mode 100644
index 0000000000000000000000000000000000000000..34e29a4d703bbbd371011a5dca898f891df2ab20
--- /dev/null
+++ b/tests/expected/evalp/AWN.csv
@@ -0,0 +1 @@
+0.0073826568351,0.0249313434669
diff --git a/tests/expected/evalp/BS.csv b/tests/expected/evalp/BS.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3db5ba184a4650f383600095ea081a04acf97c74
--- /dev/null
+++ b/tests/expected/evalp/BS.csv
@@ -0,0 +1,4 @@
+0.1061513565769
+0.0739562201528
+0.0866918610329
+nan
diff --git a/tests/expected/evalp/BSS.csv b/tests/expected/evalp/BSS.csv
new file mode 100644
index 0000000000000000000000000000000000000000..6b26f31c41c6febf5321dbd600f2c38654b819a6
--- /dev/null
+++ b/tests/expected/evalp/BSS.csv
@@ -0,0 +1,4 @@
+0.5705594211361
+0.6661165249535
+0.5635125720476
+nan
diff --git a/tests/expected/evalp/BS_CRD.csv b/tests/expected/evalp/BS_CRD.csv
new file mode 100644
index 0000000000000000000000000000000000000000..9fa242b1a1bf2d7e205232c2eb70a407319e2295
--- /dev/null
+++ b/tests/expected/evalp/BS_CRD.csv
@@ -0,0 +1,4 @@
+0.0114117580190,0.1524456042419,0.2471852027998
+0.0055324125593,0.1530792786029,0.2215030861964
+0.0101394313199,0.1220600742934,0.1986125040064
+nan,nan,nan
diff --git a/tests/expected/evalp/BS_LBD.csv b/tests/expected/evalp/BS_LBD.csv
new file mode 100644
index 0000000000000000000000000000000000000000..903f1023e8152bb24a3439e5c5695868c6d65bd4
--- /dev/null
+++ b/tests/expected/evalp/BS_LBD.csv
@@ -0,0 +1,4 @@
+0.0121598807967,0.1506234181408,0.2446148939211
+0.0080317462446,0.1473868836293,0.2133113575375
+0.0171912794414,0.1048221425794,0.1743227241709
+nan,nan,nan
diff --git a/tests/expected/evalp/CR.csv b/tests/expected/evalp/CR.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a4a746929f3e5aef516ba7ca4e79cb9982e0f69f
--- /dev/null
+++ b/tests/expected/evalp/CR.csv
@@ -0,0 +1 @@
+0.0064308681672,0.0353697749196
diff --git a/tests/expected/evalp/CRPS_FROM_BS.csv b/tests/expected/evalp/CRPS_FROM_BS.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7a155403cfd93a41adafa598edbc99227973ad0a
--- /dev/null
+++ b/tests/expected/evalp/CRPS_FROM_BS.csv
@@ -0,0 +1 @@
+226.5713674310274
diff --git a/tests/expected/evalp/CRPS_FROM_ECDF.csv b/tests/expected/evalp/CRPS_FROM_ECDF.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a639c991d747ee29be096a2354aafdb4d737cb01
--- /dev/null
+++ b/tests/expected/evalp/CRPS_FROM_ECDF.csv
@@ -0,0 +1 @@
+262.615225902479
diff --git a/tests/expected/evalp/CRPS_FROM_QS.csv b/tests/expected/evalp/CRPS_FROM_QS.csv
new file mode 100644
index 0000000000000000000000000000000000000000..87b0d67fdfd3eb30ded9c9a1408f6bd762a9e349
--- /dev/null
+++ b/tests/expected/evalp/CRPS_FROM_QS.csv
@@ -0,0 +1 @@
+252.9569186533230
diff --git a/tests/expected/evalp/CSI.csv b/tests/expected/evalp/CSI.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ed9f3d5d8e6be62028c0a624937bc8b71aeac154
--- /dev/null
+++ b/tests/expected/evalp/CSI.csv
@@ -0,0 +1,52 @@
+0.4469453376206,0.3311897106109,0.2733118971061,nan
+0.7792207792208,0.8108108108108,0.7032967032967,nan
+0.7792207792208,0.8108108108108,0.7032967032967,nan
+0.7792207792208,0.8108108108108,0.7032967032967,nan
+0.7792207792208,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7712418300654,0.8108108108108,0.7032967032967,nan
+0.7712418300654,0.8108108108108,0.7032967032967,nan
+0.7712418300654,0.8018018018018,0.7111111111111,nan
diff --git a/tests/expected/evalp/DS.csv b/tests/expected/evalp/DS.csv
new file mode 100644
index 0000000000000000000000000000000000000000..bf3ba37e57fefc74f9d488499775accd02e3a2d0
--- /dev/null
+++ b/tests/expected/evalp/DS.csv
@@ -0,0 +1 @@
+148.7901639344262
diff --git a/tests/expected/evalp/ES.csv b/tests/expected/evalp/ES.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2cb138c9c9c0c1a2213a8ffc4ddca773d845a906
--- /dev/null
+++ b/tests/expected/evalp/ES.csv
@@ -0,0 +1 @@
+587.2254970444062
diff --git a/tests/expected/evalp/FAR.csv b/tests/expected/evalp/FAR.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7f82d19402209cdc1006165e106dc64febbc30d5
--- /dev/null
+++ b/tests/expected/evalp/FAR.csv
@@ -0,0 +1,52 @@
+0.5530546623794,0.6688102893891,0.7266881028939,nan
+0.1111111111111,0.0816326530612,0.0857142857143,nan
+0.1111111111111,0.0816326530612,0.0857142857143,nan
+0.1111111111111,0.0816326530612,0.0857142857143,nan
+0.1111111111111,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1060606060606,0.0816326530612,0.0857142857143,nan
+0.1060606060606,0.0816326530612,0.0857142857143,nan
+0.1060606060606,0.0824742268041,0.0724637681159,nan
diff --git a/tests/expected/evalp/POD.csv b/tests/expected/evalp/POD.csv
new file mode 100644
index 0000000000000000000000000000000000000000..80667c12ae32a3ce3b15b0f4ad6f17cefa1a9d74
--- /dev/null
+++ b/tests/expected/evalp/POD.csv
@@ -0,0 +1,52 @@
+1.0000000000000,1.0000000000000,1.0000000000000,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8489208633094,0.8737864077670,0.7529411764706,nan
+0.8489208633094,0.8737864077670,0.7529411764706,nan
+0.8489208633094,0.8640776699029,0.7529411764706,nan
diff --git a/tests/expected/evalp/POFD.csv b/tests/expected/evalp/POFD.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d3007cb09d09716c216dece30c97b68dd9752c3b
--- /dev/null
+++ b/tests/expected/evalp/POFD.csv
@@ -0,0 +1,52 @@
+1.0000000000000,1.0000000000000,1.0000000000000,nan
+0.0872093023256,0.0384615384615,0.0265486725664,nan
+0.0872093023256,0.0384615384615,0.0265486725664,nan
+0.0872093023256,0.0384615384615,0.0265486725664,nan
+0.0872093023256,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0221238938053,nan
diff --git a/tests/expected/evalp/QS.csv b/tests/expected/evalp/QS.csv
new file mode 100644
index 0000000000000000000000000000000000000000..10f11b5a009026f2a9fc07994ed57e0854f3e690
--- /dev/null
+++ b/tests/expected/evalp/QS.csv
@@ -0,0 +1 @@
+345.9157803611179,345.0692555033388,343.1293593865944,340.7098689092258,338.2815978233983,335.9735345040806,333.5551570615883,330.3324264160278,327.3335394509029,324.3259955478602,321.1900816225579,318.1751174870145,315.1221864951768,311.9720504575810,308.6449418748451,305.6121691813011,302.1695523126391,298.4459559732869,294.9746475389559,291.2738065792731,287.7245857036857,284.1019045263419,280.2355923818945,276.2186495176851,272.5014840465003,268.6527331189711,264.7401681919366,260.8558001484045,256.9032896364086,252.9262923571603,248.9312391788272,244.9863962404153,240.6629977739305,236.3289636408610,232.0897848132574,227.3870887954491,222.9760079149148,218.6999752658918,214.0996784565916,209.6725204056392,205.1895869403907,200.3957457333661,195.2372000989366,190.0801385110065,185.3842443729902,180.6178580262183,174.5832302745488,169.1540934949294,163.1109324758844,156.2747959436064,147.5753153598814
diff --git a/tests/expected/evalp/RANK_HIST.csv b/tests/expected/evalp/RANK_HIST.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c35d0094a8725513a0e888bd151ab70f0fcb9b60
--- /dev/null
+++ b/tests/expected/evalp/RANK_HIST.csv
@@ -0,0 +1 @@
+0.6077170418006,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0032154340836,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0032154340836,0.0000000000000,0.0032154340836,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0032154340836,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0064308681672,0.0000000000000,0.0000000000000,0.0032154340836,0.0064308681672,0.0000000000000,0.0000000000000,0.0000000000000,0.0032154340836,0.0000000000000,0.0000000000000,0.0032154340836,0.0032154340836,0.0032154340836,0.0000000000000,0.0064308681672,0.3440514469453
diff --git a/tests/expected/evalp/REL_DIAG.csv b/tests/expected/evalp/REL_DIAG.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3568332be044fda594e68d46f3c9ac93bfad0af4
--- /dev/null
+++ b/tests/expected/evalp/REL_DIAG.csv
@@ -0,0 +1,208 @@
+0.0000000000000,0.1060606060606,132.0000000000000
+0.0196078431373,0.0000000000000,0.0000000000000
+0.0392156862745,0.0000000000000,0.0000000000000
+0.0588235294118,0.0000000000000,2.0000000000000
+0.0784313725490,0.0000000000000,0.0000000000000
+0.0980392156863,0.0000000000000,0.0000000000000
+0.1176470588235,0.0000000000000,0.0000000000000
+0.1372549019608,0.0000000000000,0.0000000000000
+0.1568627450980,0.0000000000000,0.0000000000000
+0.1764705882353,0.0000000000000,0.0000000000000
+0.1960784313725,0.0000000000000,0.0000000000000
+0.2156862745098,0.0000000000000,0.0000000000000
+0.2352941176471,0.0000000000000,0.0000000000000
+0.2549019607843,0.0000000000000,0.0000000000000
+0.2745098039216,0.0000000000000,0.0000000000000
+0.2941176470588,0.0000000000000,0.0000000000000
+0.3137254901961,0.0000000000000,0.0000000000000
+0.3333333333333,0.0000000000000,0.0000000000000
+0.3529411764706,0.0000000000000,0.0000000000000
+0.3725490196078,0.0000000000000,0.0000000000000
+0.3921568627451,0.0000000000000,0.0000000000000
+0.4117647058824,0.0000000000000,0.0000000000000
+0.4313725490196,0.0000000000000,0.0000000000000
+0.4509803921569,0.0000000000000,0.0000000000000
+0.4705882352941,0.0000000000000,0.0000000000000
+0.4901960784314,0.0000000000000,0.0000000000000
+0.5098039215686,0.0000000000000,0.0000000000000
+0.5294117647059,0.0000000000000,0.0000000000000
+0.5490196078431,0.0000000000000,0.0000000000000
+0.5686274509804,0.0000000000000,0.0000000000000
+0.5882352941176,0.0000000000000,0.0000000000000
+0.6078431372549,0.0000000000000,0.0000000000000
+0.6274509803922,0.0000000000000,0.0000000000000
+0.6470588235294,0.0000000000000,0.0000000000000
+0.6666666666667,0.0000000000000,0.0000000000000
+0.6862745098039,0.0000000000000,0.0000000000000
+0.7058823529412,0.0000000000000,0.0000000000000
+0.7254901960784,0.0000000000000,0.0000000000000
+0.7450980392157,0.0000000000000,0.0000000000000
+0.7647058823529,0.0000000000000,0.0000000000000
+0.7843137254902,0.0000000000000,0.0000000000000
+0.8039215686275,0.0000000000000,0.0000000000000
+0.8235294117647,0.0000000000000,0.0000000000000
+0.8431372549020,0.0000000000000,0.0000000000000
+0.8627450980392,0.0000000000000,0.0000000000000
+0.8823529411765,0.0000000000000,0.0000000000000
+0.9019607843137,0.0000000000000,0.0000000000000
+0.9215686274510,1.0000000000000,1.0000000000000
+0.9411764705882,0.0000000000000,0.0000000000000
+0.9607843137255,0.0000000000000,0.0000000000000
+0.9803921568627,0.0000000000000,0.0000000000000
+1.0000000000000,0.8920454545455,176.0000000000000
+0.0000000000000,0.0842105263158,95.0000000000000
+0.0196078431373,0.0000000000000,1.0000000000000
+0.0392156862745,0.0000000000000,0.0000000000000
+0.0588235294118,0.0000000000000,0.0000000000000
+0.0784313725490,0.0000000000000,0.0000000000000
+0.0980392156863,0.0000000000000,0.0000000000000
+0.1176470588235,0.0000000000000,0.0000000000000
+0.1372549019608,0.0000000000000,0.0000000000000
+0.1568627450980,0.0000000000000,0.0000000000000
+0.1764705882353,0.0000000000000,0.0000000000000
+0.1960784313725,0.0000000000000,0.0000000000000
+0.2156862745098,0.0000000000000,0.0000000000000
+0.2352941176471,0.0000000000000,0.0000000000000
+0.2549019607843,0.0000000000000,0.0000000000000
+0.2745098039216,0.0000000000000,0.0000000000000
+0.2941176470588,0.0000000000000,0.0000000000000
+0.3137254901961,0.0000000000000,0.0000000000000
+0.3333333333333,0.0000000000000,0.0000000000000
+0.3529411764706,0.0000000000000,0.0000000000000
+0.3725490196078,0.0000000000000,0.0000000000000
+0.3921568627451,0.0000000000000,0.0000000000000
+0.4117647058824,0.0000000000000,0.0000000000000
+0.4313725490196,0.0000000000000,0.0000000000000
+0.4509803921569,0.0000000000000,0.0000000000000
+0.4705882352941,0.0000000000000,0.0000000000000
+0.4901960784314,0.0000000000000,0.0000000000000
+0.5098039215686,0.0000000000000,0.0000000000000
+0.5294117647059,0.0000000000000,0.0000000000000
+0.5490196078431,0.0000000000000,0.0000000000000
+0.5686274509804,0.0000000000000,0.0000000000000
+0.5882352941176,0.0000000000000,0.0000000000000
+0.6078431372549,0.0000000000000,0.0000000000000
+0.6274509803922,0.0000000000000,0.0000000000000
+0.6470588235294,0.0000000000000,0.0000000000000
+0.6666666666667,0.0000000000000,0.0000000000000
+0.6862745098039,0.0000000000000,0.0000000000000
+0.7058823529412,0.0000000000000,0.0000000000000
+0.7254901960784,0.0000000000000,0.0000000000000
+0.7450980392157,0.0000000000000,0.0000000000000
+0.7647058823529,0.0000000000000,0.0000000000000
+0.7843137254902,0.0000000000000,0.0000000000000
+0.8039215686275,0.0000000000000,0.0000000000000
+0.8235294117647,0.0000000000000,0.0000000000000
+0.8431372549020,0.0000000000000,0.0000000000000
+0.8627450980392,0.0000000000000,0.0000000000000
+0.8823529411765,0.0000000000000,0.0000000000000
+0.9019607843137,0.0000000000000,0.0000000000000
+0.9215686274510,0.0000000000000,0.0000000000000
+0.9411764705882,0.0000000000000,0.0000000000000
+0.9607843137255,0.0000000000000,0.0000000000000
+0.9803921568627,0.0000000000000,0.0000000000000
+1.0000000000000,0.9302325581395,215.0000000000000
+0.0000000000000,0.0724637681159,69.0000000000000
+0.0196078431373,1.0000000000000,1.0000000000000
+0.0392156862745,0.0000000000000,0.0000000000000
+0.0588235294118,0.0000000000000,0.0000000000000
+0.0784313725490,0.0000000000000,0.0000000000000
+0.0980392156863,0.0000000000000,0.0000000000000
+0.1176470588235,0.0000000000000,0.0000000000000
+0.1372549019608,0.0000000000000,0.0000000000000
+0.1568627450980,0.0000000000000,0.0000000000000
+0.1764705882353,0.0000000000000,0.0000000000000
+0.1960784313725,0.0000000000000,0.0000000000000
+0.2156862745098,0.0000000000000,0.0000000000000
+0.2352941176471,0.0000000000000,0.0000000000000
+0.2549019607843,0.0000000000000,0.0000000000000
+0.2745098039216,0.0000000000000,0.0000000000000
+0.2941176470588,0.0000000000000,0.0000000000000
+0.3137254901961,0.0000000000000,0.0000000000000
+0.3333333333333,0.0000000000000,0.0000000000000
+0.3529411764706,0.0000000000000,0.0000000000000
+0.3725490196078,0.0000000000000,0.0000000000000
+0.3921568627451,0.0000000000000,0.0000000000000
+0.4117647058824,0.0000000000000,0.0000000000000
+0.4313725490196,0.0000000000000,0.0000000000000
+0.4509803921569,0.0000000000000,0.0000000000000
+0.4705882352941,0.0000000000000,0.0000000000000
+0.4901960784314,0.0000000000000,0.0000000000000
+0.5098039215686,0.0000000000000,0.0000000000000
+0.5294117647059,0.0000000000000,0.0000000000000
+0.5490196078431,0.0000000000000,0.0000000000000
+0.5686274509804,0.0000000000000,0.0000000000000
+0.5882352941176,0.0000000000000,0.0000000000000
+0.6078431372549,0.0000000000000,0.0000000000000
+0.6274509803922,0.0000000000000,0.0000000000000
+0.6470588235294,0.0000000000000,0.0000000000000
+0.6666666666667,0.0000000000000,0.0000000000000
+0.6862745098039,0.0000000000000,0.0000000000000
+0.7058823529412,0.0000000000000,0.0000000000000
+0.7254901960784,0.0000000000000,0.0000000000000
+0.7450980392157,0.0000000000000,0.0000000000000
+0.7647058823529,0.0000000000000,0.0000000000000
+0.7843137254902,0.0000000000000,0.0000000000000
+0.8039215686275,0.0000000000000,0.0000000000000
+0.8235294117647,0.0000000000000,0.0000000000000
+0.8431372549020,0.0000000000000,0.0000000000000
+0.8627450980392,0.0000000000000,0.0000000000000
+0.8823529411765,0.0000000000000,0.0000000000000
+0.9019607843137,0.0000000000000,0.0000000000000
+0.9215686274510,0.0000000000000,0.0000000000000
+0.9411764705882,0.0000000000000,0.0000000000000
+0.9607843137255,0.0000000000000,0.0000000000000
+0.9803921568627,0.0000000000000,0.0000000000000
+1.0000000000000,0.9128630705394,241.0000000000000
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
+nan,nan,nan
diff --git a/tests/expected/evalp/ROCSS.csv b/tests/expected/evalp/ROCSS.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ab68dea18a015816e96dbd71aca33f6bbbd64567
--- /dev/null
+++ b/tests/expected/evalp/ROCSS.csv
@@ -0,0 +1,4 @@
+0.7108499247114
+0.8017176997760
+0.7130661114003
+nan
diff --git a/tests/expected/evalp/WS.csv b/tests/expected/evalp/WS.csv
new file mode 100644
index 0000000000000000000000000000000000000000..82b07bb09227e378a17fa2a7a485f5371f36cccf
--- /dev/null
+++ b/tests/expected/evalp/WS.csv
@@ -0,0 +1 @@
+764.4471750114835,2578.1382636655953
diff --git a/tests/expected/evalp/WSS.csv b/tests/expected/evalp/WSS.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fd929dabd8f58354be587455dcbd4402d6a9f83d
--- /dev/null
+++ b/tests/expected/evalp/WSS.csv
@@ -0,0 +1 @@
+0.6621887740287,0.4360388849930
diff --git a/tests/test_determinist.py b/tests/test_determinist.py
index 4669b526c0088d83f1d40bc2bb8dd7314591b502..7757d64602e11f6fa189033107695102b9de9041 100644
--- a/tests/test_determinist.py
+++ b/tests/test_determinist.py
@@ -5,44 +5,50 @@ import evalhyd
 
 
 # load some predicted and observed streamflow
-_prd = numpy.genfromtxt("./data/q_prd.csv", delimiter=',')[:5, :]
+_prd = numpy.genfromtxt("./data/q_prd.csv", delimiter=',')[:, :]
 _obs = numpy.genfromtxt("./data/q_obs.csv", delimiter=',')[numpy.newaxis, :]
 
+_thr = numpy.repeat(
+    numpy.array([[690, 534, 445, numpy.nan]]), repeats=_prd.shape[0], axis=0
+)
+_events = "high"
+
+# list all available deterministic metrics
+_all_metrics = (
+    # errors-based
+    'MAE', 'MARE', 'MSE', 'RMSE',
+    # efficiencies-based
+    'NSE', 'KGE', 'KGE_D', 'KGEPRIME', 'KGEPRIME_D',
+    # contingency table-based
+    'CONT_TBL'
+)
+
+# list all available deterministic diagnostics
+_all_diags = (
+    'completeness'
+)
+
 
 class TestMetrics(unittest.TestCase):
 
     expected = {
-        'RMSE':
-            [[[777.03427238]],
-             [[776.87847854]],
-             [[777.80021654]],
-             [[778.15108180]],
-             [[778.61486998]]],
-        'NSE':
-            [[[0.71891219]],
-             [[0.71902490]],
-             [[0.71835777]],
-             [[0.71810361]],
-             [[0.71776748]]],
-        'KGE':
-            [[[0.74808767]],
-             [[0.74610620]],
-             [[0.74411103]],
-             [[0.74301085]],
-             [[0.74176777]]],
-        'KGEPRIME':
-            [[[0.81314075]],
-             [[0.81277485]],
-             [[0.81203242]],
-             [[0.81178671]],
-             [[0.81138658]]]
+        metric: (
+            numpy.genfromtxt(f"./expected/evald/{metric}.csv", delimiter=',')
+            [:, numpy.newaxis, numpy.newaxis]
+        ) for metric in _all_metrics
     }
+    # /!\ stacked-up thresholds in CSV file for CONT_TBL
+    #     because 5D metric so need to reshape array
+    expected['CONT_TBL'] = (
+        expected['CONT_TBL'].reshape(expected['NSE'].shape + (4, 4))
+    )
 
     def test_metrics_2d(self):
         for metric in self.expected.keys():
             with self.subTest(metric=metric):
                 numpy.testing.assert_almost_equal(
-                    evalhyd.evald(_obs, _prd, [metric])[0],
+                    evalhyd.evald(_obs, _prd, [metric],
+                                  q_thr=_thr, events=_events)[0],
                     self.expected[metric]
                 )
 
@@ -50,7 +56,8 @@ class TestMetrics(unittest.TestCase):
         for metric in self.expected.keys():
             with self.subTest(metric=metric):
                 numpy.testing.assert_almost_equal(
-                    evalhyd.evald(_obs[0], _prd[0], [metric])[0],
+                    evalhyd.evald(_obs[0], _prd[0], [metric],
+                                  q_thr=_thr[0], events=_events)[0],
                     [self.expected[metric][0]]
                 )
 
@@ -58,75 +65,127 @@ class TestMetrics(unittest.TestCase):
 class TestTransform(unittest.TestCase):
 
     def test_transform_sqrt(self):
-        numpy.testing.assert_almost_equal(
-            evalhyd.evald(_obs, _prd, ["NSE"], "sqrt")[0],
-            evalhyd.evald(_obs ** 0.5, _prd ** 0.5, ["NSE"])[0]
-        )
+        for metric in _all_metrics:
+            with self.subTest(metric=metric):
+                numpy.testing.assert_almost_equal(
+                    evalhyd.evald(_obs, _prd, ["NSE"], transform="sqrt",
+                                  q_thr=_thr, events=_events)[0],
+                    evalhyd.evald(_obs ** 0.5, _prd ** 0.5, ["NSE"],
+                                  q_thr=_thr ** 0.5, events=_events)[0]
+                )
 
     def test_transform_inv(self):
         eps = 0.01 * numpy.mean(_obs)
-        numpy.testing.assert_almost_equal(
-            evalhyd.evald(_obs, _prd, ["NSE"], "inv")[0],
-            evalhyd.evald(1 / (_obs + eps), 1 / (_prd + eps), ["NSE"])[0]
-        )
+        for metric in _all_metrics:
+            with self.subTest(metric=metric):
+                numpy.testing.assert_almost_equal(
+                    evalhyd.evald(_obs, _prd, ["NSE"], transform="inv",
+                                  q_thr=_thr, events=_events)[0],
+                    evalhyd.evald(1 / (_obs + eps), 1 / (_prd + eps), ["NSE"],
+                                  q_thr=1 / (_thr + eps), events=_events)[0]
+                )
 
     def test_transform_log(self):
         eps = 0.01 * numpy.mean(_obs)
-        numpy.testing.assert_almost_equal(
-            evalhyd.evald(_obs, _prd, ["NSE"], "log")[0],
-            evalhyd.evald(numpy.log(_obs + eps), numpy.log(_prd + eps),
-                          ["NSE"])[0]
-        )
+        for metric in _all_metrics:
+            with self.subTest(metric=metric):
+                numpy.testing.assert_almost_equal(
+                    evalhyd.evald(_obs, _prd, ["NSE"], transform="log",
+                                  q_thr=_thr, events=_events)[0],
+                    evalhyd.evald(numpy.log(_obs + eps), numpy.log(_prd + eps),
+                                  ["NSE"],
+                                  q_thr=numpy.log(_thr + eps), events=_events)[0]
+                )
 
     def test_transform_pow(self):
-        numpy.testing.assert_almost_equal(
-            evalhyd.evald(_obs, _prd, ["NSE"], "pow", exponent=0.3)[0],
-            evalhyd.evald(_obs ** 0.3, _prd ** 0.3, ["NSE"])[0]
-        )
+        for metric in _all_metrics:
+            with self.subTest(metric=metric):
+                numpy.testing.assert_almost_equal(
+                    evalhyd.evald(_obs, _prd, ["NSE"],
+                                  q_thr=_thr, events=_events,
+                                  transform="pow", exponent=0.3)[0],
+                    evalhyd.evald(_obs ** 0.3, _prd ** 0.3, ["NSE"],
+                                  q_thr=_thr ** 0.3, events=_events)[0]
+                )
 
 
 class TestMasking(unittest.TestCase):
 
     def test_masks(self):
-        msk = numpy.ones(_obs.shape, dtype=bool)
+        msk = numpy.ones(_prd.shape, dtype=bool)
+        msk = msk[:, numpy.newaxis, :]
         msk[..., :99] = False
+
+        # TODO: figure out why passing views would not work
+        obs = _obs[..., 99:].copy()
+        prd = _prd[..., 99:].copy()
+
         numpy.testing.assert_almost_equal(
-            evalhyd.evald(_obs, _prd, ["NSE"], t_msk=msk)[0],
-            evalhyd.evald(_obs[..., 99:], _prd[..., 99:], ["NSE"])[0]
+            evalhyd.evald(_obs, _prd, ["NSE"],
+                          q_thr=_thr, events=_events, t_msk=msk)[0],
+            evalhyd.evald(obs, prd, ["NSE"],
+                          q_thr=_thr, events=_events)[0]
         )
 
     def test_conditions(self):
-        with self.subTest(condtions="observed streamflow values"):
-            cdt = numpy.array(["q_obs{<2000,>3000}"], dtype='|S32')
+        with self.subTest(conditions="observed streamflow values"):
+            cdt = numpy.array([["q_obs{<2000,>3000}"]] * _prd.shape[0],
+                              dtype='|S32')
 
             msk = (_obs[0] < 2000) | (_obs[0] > 3000)
 
-            obs = _obs[..., msk]
-            prd = _prd[..., msk]
+            # TODO: figure out why passing views would not work
+            obs = _obs[..., msk].copy()
+            prd = _prd[..., msk].copy()
 
             numpy.testing.assert_almost_equal(
-                evalhyd.evald(_obs, _prd, ["NSE"], m_cdt=cdt)[0],
-                evalhyd.evald(obs, prd, ["NSE"])[0]
+                evalhyd.evald(_obs, _prd, ["NSE"],
+                              q_thr=_thr, events=_events, m_cdt=cdt)[0],
+                evalhyd.evald(obs, prd, ["NSE"],
+                              q_thr=_thr, events=_events)[0]
             )
 
-        with self.subTest(condtions="observed streamflow statistics"):
-            cdt = numpy.array(["q_obs{>=median}"], dtype='|S32')
+        with self.subTest(conditions="observed streamflow statistics"):
+            cdt = numpy.array([["q_obs{>=median}"]] * _prd.shape[0],
+                              dtype='|S32')
 
             msk = _obs[0] >= numpy.median(_obs)
 
-            obs = _obs[..., msk]
-            prd = _prd[..., msk]
+            # TODO: figure out why passing views would not work
+            obs = _obs[..., msk].copy()
+            prd = _prd[..., msk].copy()
+
+            numpy.testing.assert_almost_equal(
+                evalhyd.evald(_obs, _prd, ["NSE"],
+                              q_thr=_thr, events=_events, m_cdt=cdt)[0],
+                evalhyd.evald(obs, prd, ["NSE"],
+                              q_thr=_thr, events=_events)[0]
+            )
+
+        with self.subTest(conditions="time indices"):
+            cdt = numpy.array([["t{20:311}"]] * (_prd.shape[0] - 4) +
+                              [["t{20:100,100:311}"],
+                               ["t{20,21,22,23,24:311}"],
+                               ["t{20,21,22,23:309,309,310}"],
+                               ["t{20:80,80,81,82,83:311}"]],
+                              dtype='|S32')
+
+            # TODO: figure out why passing views would not work
+            obs = _obs[..., 20:].copy()
+            prd = _prd[..., 20:].copy()
 
             numpy.testing.assert_almost_equal(
-                evalhyd.evald(_obs, _prd, ["NSE"], m_cdt=cdt)[0],
-                evalhyd.evald(obs, prd, ["NSE"])[0]
+                evalhyd.evald(_obs, _prd, ["NSE"],
+                              q_thr=_thr, events=_events, m_cdt=cdt)[0],
+                evalhyd.evald(obs, prd, ["NSE"],
+                              q_thr=_thr, events=_events)[0]
             )
 
 
 class TestMissingData(unittest.TestCase):
 
     def test_nan(self):
-        for metric in ('RMSE', 'NSE', 'KGE', 'KGEPRIME'):
+        for metric in _all_metrics:
             obs = numpy.array(
                 [[4.7, numpy.nan, 5.5, 2.7, 4.1]]
             )
@@ -135,9 +194,16 @@ class TestMissingData(unittest.TestCase):
                  [numpy.nan, 4.2, 4.7, 4.3, 3.3],
                  [5.3, 5.2, 5.7, numpy.nan, 3.9]]
             )
+            thr = numpy.array(
+                [[4., 5.],
+                 [4., 5.],
+                 [4., 5.]]
+            )
+            events = "low"
 
             with self.subTest(metric=metric):
-                res = evalhyd.evald(obs, prd, [metric])[0]
+                res = evalhyd.evald(obs, prd, [metric],
+                                    q_thr=thr, events=events)[0]
 
                 for i in range(prd.shape[0]):
                     msk = ~numpy.isnan(obs[0]) & ~numpy.isnan(prd[i])
@@ -149,7 +215,8 @@ class TestMissingData(unittest.TestCase):
                         evalhyd.evald(
                             obs[:, msk],
                             prd[i, msk][numpy.newaxis],
-                            [metric]
+                            [metric],
+                            q_thr=thr[i, :][numpy.newaxis], events=events
                         )[0]
                     )
 
@@ -170,24 +237,72 @@ class TestUncertainty(unittest.TestCase):
         obs_3yrs = numpy.hstack((obs_1yr,) * 3)
         prd_3yrs = numpy.hstack((prd_1yr,) * 3)
 
-        for metric in ('RMSE', 'NSE', 'KGE', 'KGEPRIME'):
+        thr = numpy.repeat(
+            numpy.array([[690, 534, 445, numpy.nan]]),
+            repeats=prd_1yr.shape[0], axis=0
+        )
+        events = "low"
+
+        for metric in _all_metrics:
             with self.subTest(metric=metric):
                 numpy.testing.assert_almost_equal(
                     # bootstrap with only one year of data
                     # (compare last sample only to have matching dimensions)
                     evalhyd.evald(
                         obs_1yr, prd_1yr, [metric],
+                        q_thr=thr,
+                        events=events,
                         bootstrap={
                             "n_samples": 10, "len_sample": 3, "summary": 0
                         },
                         dts=dts_1yr
-                    )[0][..., [0]],
+                    )[0][:, :, [0]],
                     # repeat year of data three times to correspond to a
                     # bootstrap sample of length 3
-                    evalhyd.evald(obs_3yrs, prd_3yrs, [metric])[0]
+                    evalhyd.evald(obs_3yrs, prd_3yrs, [metric],
+                                  q_thr=thr, events=events)[0]
                 )
 
 
+class TestDiagnostics(unittest.TestCase):
+
+    def test_completeness(self):
+        obs = numpy.array(
+            [[4.7, 4.3, numpy.nan, 2.7, 4.1, 5.0]]
+        )
+
+        prd = numpy.array(
+            [[5.3, numpy.nan, 5.7, 2.3, 3.3, 4.1],
+             [4.3, 4.2, 4.7, 4.3, 3.3, 2.8],
+             [5.3, numpy.nan, 5.7, 2.3, 3.8, numpy.nan]]
+        )
+
+        msk = numpy.array(
+            [[[True, True, True, False, True, True],
+              [True, True, True, True, True, True]],
+             [[True, True, True, True, True, False],
+              [True, True, True, True, True, True]],
+             [[True, True, True, False, False, True],
+              [True, True, True, True, True, True]]]
+        )
+
+        exp = numpy.array(
+            [[[3.],
+              [4.]],
+             [[4.],
+              [5.]],
+             [[1.],
+              [3.]]]
+        )
+
+        numpy.testing.assert_almost_equal(
+            exp,
+            evalhyd.evald(
+                obs, prd, ["NSE"], t_msk=msk, diagnostics=["completeness"]
+            )[1]
+        )
+
+
 if __name__ == '__main__':
     test_loader = unittest.TestLoader()
     test_suite = unittest.TestSuite()
diff --git a/tests/test_probabilist.py b/tests/test_probabilist.py
index 3d458ef6a9eceff66c3b35918d62b6224087e8db..9c9b945d730b554cb3e7b73d6e0f503327436515 100644
--- a/tests/test_probabilist.py
+++ b/tests/test_probabilist.py
@@ -7,48 +7,109 @@ import evalhyd
 # load some predicted and observed streamflow
 _prd = (
     numpy.genfromtxt("./data/q_prd.csv", delimiter=',')
-    [:5, :][numpy.newaxis, numpy.newaxis, ...]
+    [numpy.newaxis, numpy.newaxis, ...]
 )
 _obs = numpy.genfromtxt("./data/q_obs.csv", delimiter=',')[numpy.newaxis, :]
 
+# list all available probabilistic metrics
+_all_metrics = (
+    # threshold-based
+    'BS', 'BSS', 'BS_CRD', 'BS_LBD', 'REL_DIAG', 'CRPS_FROM_BS',
+    # CDF-based
+    'CRPS_FROM_ECDF',
+    # quantile-based
+    'QS', 'CRPS_FROM_QS',
+    # contingency table-based
+    'POD', 'POFD', 'FAR', 'CSI', 'ROCSS',
+    # ranks-based
+    'RANK_HIST', 'DS', 'AS',
+    # intervals
+    'CR', 'AW', 'AWN', 'AWI', 'WS', 'WSS',
+    # multivariate
+    'ES'
+)
+
+# list all available deterministic diagnostics
+_all_diags = (
+    'completeness'
+)
+
 
 class TestMetrics(unittest.TestCase):
 
     expected_thr = {
-        'BS':
-            [[[[[0.1081672, 0.073954980, 0.08681672, numpy.nan]]]]],
-        'BSS':
-            [[[[[0.56240422, 0.66612211, 0.56288391, numpy.nan]]]]],
-        'BS_CRD':
-            [[[[[[0.01335634, 0.15237434, 0.24718520],
-                 [0.00550861, 0.15305671, 0.22150309],
-                 [0.00753750, 0.11933328, 0.19861250],
-                 [numpy.nan, numpy.nan, numpy.nan]]]]]],
-        'BS_LBD':
-            [[[[[[0.01244569, 0.14933386, 0.24505537],
-                 [0.00801337, 0.14745568, 0.21339730],
-                 [0.01719462, 0.10479711, 0.17441921],
-                 [numpy.nan, numpy.nan, numpy.nan]]]]]]
+        metric: (
+            numpy.genfromtxt(f"./expected/evalp/{metric}.csv", delimiter=',')
+            [numpy.newaxis, numpy.newaxis, numpy.newaxis, numpy.newaxis, ...]
+        ) for metric in ('BS', 'BSS', 'BS_CRD', 'BS_LBD', 'REL_DIAG', 'CRPS_FROM_BS')
+    }
+    # /!\ stacked-up thresholds in CSV file for REL_DIAG
+    #     because 7D metric so need to reshape array
+    expected_thr['REL_DIAG'] = (
+        expected_thr['REL_DIAG'].reshape(expected_thr['BS'].shape
+                                         + (_prd.shape[2] + 1, 3))
+    )
+
+    expected_cdf = {
+        metric: (
+            numpy.genfromtxt(f"./expected/evalp/{metric}.csv", delimiter=',')
+            [numpy.newaxis, numpy.newaxis, numpy.newaxis, numpy.newaxis, ...]
+        ) for metric in ('CRPS_FROM_ECDF',)
     }
 
     expected_qtl = {
-        'QS':
-            [[[[[321.1607717,  294.3494105,  265.70418006,
-                 236.15648446, 206.03965702]]]]],
-        'CRPS':
-            [[[[176.63504823]]]]
+        metric: (
+            numpy.genfromtxt(f"./expected/evalp/{metric}.csv", delimiter=',')
+            [numpy.newaxis, numpy.newaxis, numpy.newaxis, numpy.newaxis, ...]
+        ) for metric in ('QS', 'CRPS_FROM_QS')
+    }
+
+    expected_ct = {
+        metric: (
+            numpy.genfromtxt(f"./expected/evalp/{metric}.csv", delimiter=',')
+            [numpy.newaxis, numpy.newaxis, numpy.newaxis, numpy.newaxis, ...]
+        ) for metric in ('POD', 'POFD', 'FAR', 'CSI', 'ROCSS')
+    }
+
+    expected_rk = {
+        metric: (
+            numpy.genfromtxt(f"./expected/evalp/{metric}.csv", delimiter=',')
+            [numpy.newaxis, numpy.newaxis, numpy.newaxis, numpy.newaxis, ...]
+        ) for metric in ('RANK_HIST', 'DS', 'AS')
+    }
+
+    expected_itv = {
+        metric: (
+            numpy.genfromtxt(f"./expected/evalp/{metric}.csv", delimiter=',')
+            [numpy.newaxis, numpy.newaxis, numpy.newaxis, numpy.newaxis, ...]
+        ) for metric in ('CR', 'AW', 'AWN', 'AWI', 'WS', 'WSS')
+    }
+
+    expected_mvr = {
+        metric: (
+            numpy.genfromtxt(f"./expected/evalp/{metric}.csv", delimiter=',')
+            [numpy.newaxis, numpy.newaxis, numpy.newaxis, numpy.newaxis, ...]
+        ) for metric in ('ES',)
     }
 
-    def test_threshold_metrics(self):
+    def test_thresholds_metrics(self):
         thr = numpy.array([[690, 534, 445, numpy.nan]])
         for metric in self.expected_thr.keys():
             with self.subTest(metric=metric):
                 numpy.testing.assert_almost_equal(
-                    evalhyd.evalp(_obs, _prd, [metric], thr)[0],
+                    evalhyd.evalp(_obs, _prd, [metric], thr, "high")[0],
                     self.expected_thr[metric]
                 )
 
-    def test_quantile_metrics(self):
+    def test_cdf_metrics(self):
+        for metric in self.expected_cdf.keys():
+            with self.subTest(metric=metric):
+                numpy.testing.assert_almost_equal(
+                    evalhyd.evalp(_obs, _prd, [metric])[0],
+                    self.expected_cdf[metric]
+                )
+
+    def test_quantiles_metrics(self):
         for metric in self.expected_qtl.keys():
             with self.subTest(metric=metric):
                 numpy.testing.assert_almost_equal(
@@ -56,21 +117,65 @@ class TestMetrics(unittest.TestCase):
                     self.expected_qtl[metric]
                 )
 
+    def test_contingency_table_metrics(self):
+        for metric in self.expected_ct.keys():
+            thr = numpy.array([[690, 534, 445, numpy.nan]])
+            with self.subTest(metric=metric):
+                numpy.testing.assert_almost_equal(
+                    evalhyd.evalp(_obs, _prd, [metric], thr, "low")[0],
+                    self.expected_ct[metric]
+                )
+
+    def test_ranks_metrics(self):
+        for metric in self.expected_rk.keys():
+            with self.subTest(metric=metric):
+                numpy.testing.assert_almost_equal(
+                    evalhyd.evalp(_obs, _prd, [metric], seed=7)[0],
+                    self.expected_rk[metric]
+                )
+
+    def test_intervals_metrics(self):
+        lvl = numpy.array([30., 80.])
+        for metric in self.expected_itv.keys():
+
+            numpy.set_printoptions(precision=13)
+            m = evalhyd.evalp(_obs, _prd, [metric], c_lvl=lvl)[0][0, 0, 0]
+            numpy.savetxt(f"./expected/evalp/{metric}.csv", m, delimiter=',', fmt="%.13f")
+
+            with self.subTest(metric=metric):
+                numpy.testing.assert_almost_equal(
+                    evalhyd.evalp(_obs, _prd, [metric], c_lvl=lvl)[0],
+                    self.expected_itv[metric]
+                )
+
+    def test_multivariate_metrics(self):
+        n_sit = 5
+
+        multi_obs = numpy.repeat(_obs, repeats=n_sit, axis=0)
+        multi_prd = numpy.repeat(_prd, repeats=n_sit, axis=0)
+
+        for metric in self.expected_mvr.keys():
+            with self.subTest(metric=metric):
+                numpy.testing.assert_almost_equal(
+                    evalhyd.evalp(multi_obs, multi_prd, [metric], seed=7)[0],
+                    self.expected_mvr[metric]
+                )
+
 
 class TestDecomposition(unittest.TestCase):
 
     def test_brier_calibration_refinement(self):
         thr = numpy.array([[690, 534, 445]])
-        bs, = evalhyd.evalp(_obs, _prd, ["BS"], thr)
-        bs_crd, = evalhyd.evalp(_obs, _prd, ["BS_CRD"], thr)
+        bs, = evalhyd.evalp(_obs, _prd, ["BS"], thr, "high")
+        bs_crd, = evalhyd.evalp(_obs, _prd, ["BS_CRD"], thr, "high")
         numpy.testing.assert_almost_equal(
             bs, bs_crd[..., 0] - bs_crd[..., 1] + bs_crd[..., 2]
         )
 
     def test_brier_likelihood_base_rate(self):
         thr = numpy.array([[690, 534, 445]])
-        bs, = evalhyd.evalp(_obs, _prd, ["BS"], thr)
-        bs_lbd, = evalhyd.evalp(_obs, _prd, ["BS_LBD"], thr)
+        bs, = evalhyd.evalp(_obs, _prd, ["BS"], thr, "high")
+        bs_lbd, = evalhyd.evalp(_obs, _prd, ["BS_LBD"], thr, "high")
         numpy.testing.assert_almost_equal(
             bs, bs_lbd[..., 0] - bs_lbd[..., 1] + bs_lbd[..., 2]
         )
@@ -79,35 +184,56 @@ class TestDecomposition(unittest.TestCase):
 class TestMasking(unittest.TestCase):
 
     def test_masks(self):
-        msk = numpy.ones((_prd.shape[0], _prd.shape[1], 1, _prd.shape[3]), dtype=bool)
+        msk = numpy.ones((_prd.shape[0], _prd.shape[1], 1, _prd.shape[3]),
+                         dtype=bool)
         msk[..., :99] = False
+
+        # TODO: figure out why passing views would not work
+        obs = _obs[..., 99:].copy()
+        prd = _prd[..., 99:].copy()
+
         numpy.testing.assert_almost_equal(
             evalhyd.evalp(_obs, _prd, ["QS"], t_msk=msk)[0],
-            evalhyd.evalp(_obs[..., 99:], _prd[..., 99:], ["QS"])[0]
+            evalhyd.evalp(obs, prd, ["QS"])[0]
         )
 
     def test_conditions(self):
-        with self.subTest(condtions="observed streamflow values"):
-            cdt = numpy.array([["q_obs{<2000,>3000}"]], dtype='|S32')
+        with self.subTest(conditions="observed streamflow values"):
+            cdt = numpy.array([["q_obs{<2000,>3000}"]])
 
             msk = (_obs[0] < 2000) | (_obs[0] > 3000)
 
-            obs = _obs[..., msk]
-            prd = _prd[..., msk]
+            # TODO: figure out why passing views would not work
+            obs = _obs[..., msk].copy()
+            prd = _prd[..., msk].copy()
 
             numpy.testing.assert_almost_equal(
                 evalhyd.evalp(_obs, _prd, ["QS"], m_cdt=cdt)[0],
                 evalhyd.evalp(obs, prd, ["QS"])[0]
             )
 
-        with self.subTest(condtions="predicted streamflow statistics"):
+        with self.subTest(conditions="predicted streamflow statistics"):
             cdt = numpy.array([["q_prd_median{<=quantile0.7}"]], dtype='|S32')
 
             median = numpy.squeeze(numpy.median(_prd, 2))
             msk = median <= numpy.quantile(median, 0.7)
 
-            obs = _obs[..., msk]
-            prd = _prd[..., msk]
+            # TODO: figure out why passing views would not work
+            obs = _obs[..., msk].copy()
+            prd = _prd[..., msk].copy()
+
+            numpy.testing.assert_almost_equal(
+                evalhyd.evalp(_obs, _prd, ["QS"], m_cdt=cdt)[0],
+                evalhyd.evalp(obs, prd, ["QS"])[0]
+            )
+
+        with self.subTest(conditions="time indices"):
+            cdt = numpy.array([["t{20:80,80,81,82,83:311}"]],
+                              dtype='|S32')
+
+            # TODO: figure out why passing views would not work
+            obs = _obs[..., 20:].copy()
+            prd = _prd[..., 20:].copy()
 
             numpy.testing.assert_almost_equal(
                 evalhyd.evalp(_obs, _prd, ["QS"], m_cdt=cdt)[0],
@@ -119,26 +245,35 @@ class TestMissingData(unittest.TestCase):
 
     def test_nan(self):
         thr = numpy.array([[690, 534, 445, numpy.nan]])
-        for metric in ("BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"):
+        for metric in _all_metrics:
+            # skip ranks-based metrics because they contain a random element
+            if metric in ("RANK_HIST", "DS", "AS"):
+                continue
+
             with self.subTest(metric=metric):
+                lvl = numpy.array([30., 80.])
                 numpy.testing.assert_almost_equal(
                     # missing data flagged as NaN
                     evalhyd.evalp(
-                        [[4.7, numpy.nan, 5.5, 2.7, 4.1]],
-                        [[[[5.3, 4.2, 5.7, 2.3, numpy.nan],
-                           [4.3, 4.2, 4.7, 4.3, numpy.nan],
-                           [5.3, 5.2, 5.7, 2.3, numpy.nan]]]],
+                        numpy.array([[4.7, numpy.nan, 5.5, 2.7, 4.1]]),
+                        numpy.array([[[[5.3, 4.2, 5.7, 2.3, numpy.nan],
+                                       [4.3, 4.2, 4.7, 4.3, numpy.nan],
+                                       [5.3, 5.2, 5.7, 2.3, numpy.nan]]]]),
                         [metric],
-                        thr
+                        thr,
+                        "high",
+                        lvl
                     )[0],
                     # missing data pairwise deleted from series
                     evalhyd.evalp(
-                        [[4.7, 5.5, 2.7]],
-                        [[[[5.3, 5.7, 2.3],
-                           [4.3, 4.7, 4.3],
-                           [5.3, 5.7, 2.3]]]],
+                        numpy.array([[4.7, 5.5, 2.7]]),
+                        numpy.array([[[[5.3, 5.7, 2.3],
+                                       [4.3, 4.7, 4.3],
+                                       [5.3, 5.7, 2.3]]]]),
                         [metric],
-                        thr
+                        thr,
+                        "high",
+                        lvl
                     )[0]
                 )
 
@@ -161,8 +296,13 @@ class TestUncertainty(unittest.TestCase):
         obs_3yrs = numpy.hstack((obs_1yr,) * 3)
         prd_3yrs = numpy.hstack((prd_1yr,) * 3)
 
-        for metric in ("BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"):
+        for metric in _all_metrics:
+            # skip ranks-based metrics because they contain a random element
+            if metric in ("RANK_HIST", "DS", "AS"):
+                continue
+
             with self.subTest(metric=metric):
+                lvl = numpy.array([30., 80.])
                 numpy.testing.assert_almost_equal(
                     # bootstrap with only one year of data
                     # (compare last sample only to have matching dimensions)
@@ -171,10 +311,12 @@ class TestUncertainty(unittest.TestCase):
                         prd_1yr[numpy.newaxis, numpy.newaxis],
                         [metric],
                         q_thr=thr,
+                        events="high",
                         bootstrap={
                             "n_samples": 10, "len_sample": 3, "summary": 0
                         },
-                        dts=dts_1yr
+                        dts=dts_1yr,
+                        c_lvl=lvl
                     )[0][:, :, :, [0]],
                     # repeat year of data three times to correspond to a
                     # bootstrap sample of length 3
@@ -182,11 +324,178 @@ class TestUncertainty(unittest.TestCase):
                         obs_3yrs[numpy.newaxis],
                         prd_3yrs[numpy.newaxis, numpy.newaxis],
                         [metric],
-                        q_thr=thr
+                        q_thr=thr,
+                        events="high",
+                        c_lvl=lvl
                     )[0]
                 )
 
 
+class TestMultiDimensional(unittest.TestCase):
+
+    thr = numpy.array([[690, 534, 445, numpy.nan]])
+    events = "high"
+    lvl = numpy.array([30., 80.])
+    seed = 7
+
+    # skip ranks-based metrics because they contain a random element
+    metrics = [m for m in _all_metrics if m not in ("RANK_HIST", "DS", "AS")]
+
+    def test_multi_sites(self):
+        n_sit = 3
+        multi_obs = numpy.repeat(_obs, repeats=n_sit, axis=0)
+        multi_prd = numpy.repeat(_prd, repeats=n_sit, axis=0)
+        multi_thr = numpy.repeat(self.thr, repeats=n_sit, axis=0)
+
+        # skip multisite metrics because their result is not the sum of sites
+        metrics = [m for m in self.metrics if m not in ("ES",)]
+
+        multi = evalhyd.evalp(
+            multi_obs,
+            multi_prd,
+            metrics,
+            q_thr=multi_thr,
+            events=self.events,
+            c_lvl=self.lvl,
+            seed=self.seed
+        )
+
+        mono = evalhyd.evalp(
+            _obs,
+            _prd,
+            metrics,
+            q_thr=self.thr,
+            events=self.events,
+            c_lvl=self.lvl,
+            seed=self.seed
+        )
+
+        for m, metric in enumerate(metrics):
+            for site in range(n_sit):
+                with self.subTest(metric=metric, site=site):
+                    numpy.testing.assert_almost_equal(
+                        multi[m][[site]], mono[m]
+                    )
+
+    def test_multi_leadtimes(self):
+        n_ldt = 7
+        multi_prd = numpy.repeat(_prd, repeats=n_ldt, axis=1)
+
+        multi = evalhyd.evalp(
+            _obs,
+            multi_prd,
+            self.metrics,
+            q_thr=self.thr,
+            events=self.events,
+            c_lvl=self.lvl,
+            seed=self.seed
+        )
+
+        mono = evalhyd.evalp(
+            _obs,
+            _prd,
+            self.metrics,
+            q_thr=self.thr,
+            events=self.events,
+            c_lvl=self.lvl,
+            seed=self.seed
+        )
+
+        for m, metric in enumerate(self.metrics):
+            for leadtime in range(n_ldt):
+                with self.subTest(metric=metric, leadtime=leadtime):
+                    numpy.testing.assert_almost_equal(
+                        multi[m][:, [leadtime]], mono[m]
+                    )
+
+    def test_multi_sites_multi_leadtimes(self):
+        n_sit = 3
+        n_ldt = 7
+
+        multi_obs = numpy.repeat(_obs, repeats=n_sit, axis=0)
+        multi_obs += numpy.random.randint(
+            low=0, high=200, size=(n_sit, multi_obs.shape[1])
+        )
+
+        multi_prd = numpy.repeat(_prd, repeats=n_sit, axis=0)
+        multi_prd = numpy.repeat(multi_prd, repeats=n_ldt, axis=1)
+        multi_prd += numpy.random.randint(
+            low=0, high=200, size=(n_sit, n_ldt, *multi_prd.shape[2:])
+        )
+
+        multi_thr = numpy.repeat(self.thr, repeats=n_sit, axis=0)
+
+        # skip multisite metrics because their result is not the sum of sites
+        metrics = [m for m in self.metrics if m not in ("ES",)]
+
+        multi = evalhyd.evalp(
+            multi_obs,
+            multi_prd,
+            metrics,
+            q_thr=multi_thr,
+            events=self.events,
+            c_lvl=self.lvl,
+            seed=self.seed
+        )
+
+        for m, metric in enumerate(metrics):
+            for sit in range(n_sit):
+                for ldt in range(n_ldt):
+
+                    mono = evalhyd.evalp(
+                        multi_obs[[sit]],
+                        multi_prd[[sit]][:, [ldt]],
+                        [metric],
+                        q_thr=self.thr,
+                        events=self.events,
+                        c_lvl=self.lvl,
+                        seed=self.seed
+                    )
+
+                    with self.subTest(metric=metric, site=sit, leadtime=ldt):
+                        numpy.testing.assert_almost_equal(
+                            multi[m][sit, ldt], mono[0][0, 0]
+                        )
+
+
+class TestDiagnostics(unittest.TestCase):
+
+    def test_completeness(self):
+        obs = numpy.array(
+            [[4.7, 4.3, numpy.nan, 2.7, 4.1, 5.0]]
+        )
+
+        prd = numpy.array(
+            [[[[5.3, numpy.nan, 5.7, 2.3, 3.3, numpy.nan],
+               [4.3, numpy.nan, 4.7, 4.3, 3.4, numpy.nan],
+               [5.3, numpy.nan, 5.7, 2.3, 3.8, numpy.nan]],
+              [[numpy.nan, 4.2, 5.7, 2.3, 3.1, 4.1],
+               [numpy.nan, 4.2, 4.7, 4.3, 3.3, 2.8],
+               [numpy.nan, 5.2, 5.7, 2.3, 3.9, 3.5]]]]
+        )
+
+        msk = numpy.array(
+            [[[[True, True, True, False, True, True],
+               [True, True, True, True, True, True]],
+              [[True, True, True, True, True, False],
+               [True, True, True, True, True, True]]]]
+        )
+
+        exp = numpy.array(
+            [[[[2.],
+               [3.]],
+              [[3.],
+               [4.]]]]
+        )
+
+        numpy.testing.assert_almost_equal(
+            exp,
+            evalhyd.evalp(
+                obs, prd, ["QS"], t_msk=msk, diagnostics=["completeness"]
+            )[1]
+        )
+
+
 if __name__ == '__main__':
     test_loader = unittest.TestLoader()
     test_suite = unittest.TestSuite()