diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b6d25f1ae05d8da040d4d6bbfa1c1569a6d9ac8f..339e069c2c70c642243545bea79c3ba0c3cc62fe 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,11 +3,11 @@ image: mambaorg/micromamba stages: - build_and_test -build: +build-and-test: stage: build_and_test script: # install dependencies - - micromamba install --yes --file environment-dev.yml + - micromamba install --yes --file environment.yml # configure evalhyd - cmake -B build -D CMAKE_BUILD_TYPE=Release -D CMAKE_PREFIX_PATH="$CONDA_PREFIX" # compile evalhyd diff --git a/CMakeLists.txt b/CMakeLists.txt index 049111a0050fff22a616b9a0abcc172e95a02797..d72693c41608308a31302e9798d101318379cb23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,13 @@ +# Copyright (c) 2023, INRAE. +# Distributed under the terms of the GPL-3 Licence. +# The full licence is in the file LICENCE, distributed with this software. + cmake_minimum_required(VERSION 3.15) project( EvalHyd LANGUAGES CXX - VERSION 0.0.1 + VERSION 0.1.0 DESCRIPTION "Utility to evaluate streamflow predictions" ) @@ -11,7 +15,10 @@ project( # dependencies # ------------------------------------------------------------------------------ -find_package(xtensor REQUIRED) +find_package(xtl 0.7.5 REQUIRED) +message(STATUS "Found xtl: ${xtl_INCLUDE_DIRS}/xtl") + +find_package(xtensor 0.24.6 REQUIRED) message(STATUS "Found xtensor: ${xtensor_INCLUDE_DIRS}/xtensor") # ------------------------------------------------------------------------------ @@ -21,11 +28,7 @@ message(STATUS "Found xtensor: ${xtensor_INCLUDE_DIRS}/xtensor") # define evalhyd library add_library( evalhyd - src/determinist/evald.cpp - src/probabilist/evalp.cpp - src/probabilist/evaluator_brier.cpp - src/probabilist/evaluator_elements.cpp - src/probabilist/evaluator_quantiles.cpp + INTERFACE ) add_library(EvalHyd::evalhyd ALIAS evalhyd) @@ -38,30 +41,28 @@ set_target_properties( target_include_directories( evalhyd - PUBLIC + INTERFACE $<INSTALL_INTERFACE:include> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/src ) target_link_libraries( evalhyd - PUBLIC + INTERFACE xtensor ) if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") target_compile_options( evalhyd - PRIVATE - "/bigobj" + INTERFACE + "/bigobj" ) endif() target_compile_features( evalhyd - PUBLIC + INTERFACE cxx_std_14 ) diff --git a/LICENCE.rst b/LICENCE.rst new file mode 100644 index 0000000000000000000000000000000000000000..23f589cb52912b4a70adac8992cf11dbb4054caa --- /dev/null +++ b/LICENCE.rst @@ -0,0 +1,619 @@ +GNU General Public License + +Version 3, 29 June 2007 +Copyright © 2007 Free Software Foundation, Inc. <https://fsf.org/> + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble +-------- + +The GNU General Public License is a free, copyleft license for software and other +kinds of works. + +The licenses for most software and other practical works are designed to take away +your freedom to share and change the works. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change all versions of a +program--to make sure it remains free software for all its users. We, the Free +Software Foundation, use the GNU General Public License for most of our software; it +applies also to any other work released this way by its authors. You can apply it to +your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our General +Public Licenses are designed to make sure that you have the freedom to distribute +copies of free software (and charge for them if you wish), that you receive source +code or can get it if you want it, that you can change the software or use pieces of +it in new free programs, and that you know you can do these things. + +To protect your rights, we need to prevent others from denying you these rights or +asking you to surrender the rights. Therefore, you have certain responsibilities if +you distribute copies of the software, or if you modify it: responsibilities to +respect the freedom of others. + +For example, if you distribute copies of such a program, whether gratis or for a fee, +you must pass on to the recipients the same freedoms that you received. You must make +sure that they, too, receive or can get the source code. And you must show them these +terms so they know their rights. + +Developers that use the GNU GPL protect your rights with two steps: (1) assert +copyright on the software, and (2) offer you this License giving you legal permission +to copy, distribute and/or modify it. + +For the developers' and authors' protection, the GPL clearly explains that there is +no warranty for this free software. For both users' and authors' sake, the GPL +requires that modified versions be marked as changed, so that their problems will not +be attributed erroneously to authors of previous versions. + +Some devices are designed to deny users access to install or run modified versions of +the software inside them, although the manufacturer can do so. This is fundamentally +incompatible with the aim of protecting users' freedom to change the software. The +systematic pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we have designed +this version of the GPL to prohibit the practice for those products. If such problems +arise substantially in other domains, we stand ready to extend this provision to +those domains in future versions of the GPL, as needed to protect the freedom of +users. + +Finally, every program is threatened constantly by software patents. States should +not allow patents to restrict development and use of software on general-purpose +computers, but in those that do, we wish to avoid the special danger that patents +applied to a free program could make it effectively proprietary. To prevent this, the +GPL assures that patents cannot be used to render the program non-free. + +The precise terms and conditions for copying, distribution and modification follow. + +Terms and Conditions +-------------------- + +0. Definitions +^^^^^^^^^^^^^^ + +“This License†refers to version 3 of the GNU General Public License. + +“Copyright†also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + +“The Program†refers to any copyrightable work licensed under this +License. Each licensee is addressed as “youâ€. “Licensees†and +“recipients†may be individuals or organizations. + +To “modify†a work means to copy from or adapt all or part of the work in +a fashion requiring copyright permission, other than the making of an exact copy. The +resulting work is called a “modified version†of the earlier work or a +work “based on†the earlier work. + +A “covered work†means either the unmodified Program or a work based on +the Program. + +To “propagate†a work means to do anything with it that, without +permission, would make you directly or secondarily liable for infringement under +applicable copyright law, except executing it on a computer or modifying a private +copy. Propagation includes copying, distribution (with or without modification), +making available to the public, and in some countries other activities as well. + +To “convey†a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through a computer +network, with no transfer of a copy, is not conveying. + +An interactive user interface displays “Appropriate Legal Notices†to the +extent that it includes a convenient and prominently visible feature that (1) +displays an appropriate copyright notice, and (2) tells the user that there is no +warranty for the work (except to the extent that warranties are provided), that +licensees may convey the work under this License, and how to view a copy of this +License. If the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + +1. Source Code +^^^^^^^^^^^^^^ + +The “source code†for a work means the preferred form of the work for +making modifications to it. “Object code†means any non-source form of a +work. + +A “Standard Interface†means an interface that either is an official +standard defined by a recognized standards body, or, in the case of interfaces +specified for a particular programming language, one that is widely used among +developers working in that language. + +The “System Libraries†of an executable work include anything, other than +the work as a whole, that (a) is included in the normal form of packaging a Major +Component, but which is not part of that Major Component, and (b) serves only to +enable use of the work with that Major Component, or to implement a Standard +Interface for which an implementation is available to the public in source code form. +A “Major Componentâ€, in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system (if any) on which +the executable work runs, or a compiler used to produce the work, or an object code +interpreter used to run it. + +The “Corresponding Source†for a work in object code form means all the +source code needed to generate, install, and (for an executable work) run the object +code and to modify the work, including scripts to control those activities. However, +it does not include the work's System Libraries, or general-purpose tools or +generally available free programs which are used unmodified in performing those +activities but which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for the work, and +the source code for shared libraries and dynamically linked subprograms that the work +is specifically designed to require, such as by intimate data communication or +control flow between those subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can regenerate +automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same work. + +2. Basic Permissions +^^^^^^^^^^^^^^^^^^^^ + +All rights granted under this License are granted for the term of copyright on the +Program, and are irrevocable provided the stated conditions are met. This License +explicitly affirms your unlimited permission to run the unmodified Program. The +output from running a covered work is covered by this License only if the output, +given its content, constitutes a covered work. This License acknowledges your rights +of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, without +conditions so long as your license otherwise remains in force. You may convey covered +works to others for the sole purpose of having them make modifications exclusively +for you, or provide you with facilities for running those works, provided that you +comply with the terms of this License in conveying all material for which you do not +control copyright. Those thus making or running the covered works for you must do so +exclusively on your behalf, under your direction and control, on terms that prohibit +them from making any copies of your copyrighted material outside their relationship +with you. + +Conveying under any other circumstances is permitted solely under the conditions +stated below. Sublicensing is not allowed; section 10 makes it unnecessary. + +3. Protecting Users' Legal Rights From Anti-Circumvention Law +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +No covered work shall be deemed part of an effective technological measure under any +applicable law fulfilling obligations under article 11 of the WIPO copyright treaty +adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention +of such measures. + +When you convey a covered work, you waive any legal power to forbid circumvention of +technological measures to the extent such circumvention is effected by exercising +rights under this License with respect to the covered work, and you disclaim any +intention to limit operation or modification of the work as a means of enforcing, +against the work's users, your or third parties' legal rights to forbid circumvention +of technological measures. + +4. Conveying Verbatim Copies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You may convey verbatim copies of the Program's source code as you receive it, in any +medium, provided that you conspicuously and appropriately publish on each copy an +appropriate copyright notice; keep intact all notices stating that this License and +any non-permissive terms added in accord with section 7 apply to the code; keep +intact all notices of the absence of any warranty; and give all recipients a copy of +this License along with the Program. + +You may charge any price or no price for each copy that you convey, and you may offer +support or warranty protection for a fee. + +5. Conveying Modified Source Versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You may convey a work based on the Program, or the modifications to produce it from +the Program, in the form of source code under the terms of section 4, provided that +you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified it, and giving a + relevant date. + b) The work must carry prominent notices stating that it is released under this + License and any conditions added under section 7. This requirement modifies the + requirement in section 4 to “keep intact all noticesâ€. + c) You must license the entire work, as a whole, under this License to anyone who + comes into possession of a copy. This License will therefore apply, along with any + applicable section 7 additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no permission to license the + work in any other way, but it does not invalidate such permission if you have + separately received it. + d) If the work has interactive user interfaces, each must display Appropriate Legal + Notices; however, if the Program has interactive interfaces that do not display + Appropriate Legal Notices, your work need not make them do so. + +A compilation of a covered work with other separate and independent works, which are +not by their nature extensions of the covered work, and which are not combined with +it such as to form a larger program, in or on a volume of a storage or distribution +medium, is called an “aggregate†if the compilation and its resulting +copyright are not used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work in an aggregate +does not cause this License to apply to the other parts of the aggregate. + +6. Conveying Non-Source Forms +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You may convey a covered work in object code form under the terms of sections 4 and +5, provided that you also convey the machine-readable Corresponding Source under the +terms of this License, in one of these ways: + + a) Convey the object code in, or embodied in, a physical product (including a + physical distribution medium), accompanied by the Corresponding Source fixed on a + durable physical medium customarily used for software interchange. + b) Convey the object code in, or embodied in, a physical product (including a + physical distribution medium), accompanied by a written offer, valid for at least + three years and valid for as long as you offer spare parts or customer support for + that product model, to give anyone who possesses the object code either (1) a copy of + the Corresponding Source for all the software in the product that is covered by this + License, on a durable physical medium customarily used for software interchange, for + a price no more than your reasonable cost of physically performing this conveying of + source, or (2) access to copy the Corresponding Source from a network server at no + charge. + c) Convey individual copies of the object code with a copy of the written offer to + provide the Corresponding Source. This alternative is allowed only occasionally and + noncommercially, and only if you received the object code with such an offer, in + accord with subsection 6b. + d) Convey the object code by offering access from a designated place (gratis or for + a charge), and offer equivalent access to the Corresponding Source in the same way + through the same place at no further charge. You need not require recipients to copy + the Corresponding Source along with the object code. If the place to copy the object + code is a network server, the Corresponding Source may be on a different server + (operated by you or a third party) that supports equivalent copying facilities, + provided you maintain clear directions next to the object code saying where to find + the Corresponding Source. Regardless of what server hosts the Corresponding Source, + you remain obligated to ensure that it is available for as long as needed to satisfy + these requirements. + e) Convey the object code using peer-to-peer transmission, provided you inform + other peers where the object code and Corresponding Source of the work are being + offered to the general public at no charge under subsection 6d. + +A separable portion of the object code, whose source code is excluded from the +Corresponding Source as a System Library, need not be included in conveying the +object code work. + +A “User Product†is either (1) a “consumer productâ€, which +means any tangible personal property which is normally used for personal, family, or +household purposes, or (2) anything designed or sold for incorporation into a +dwelling. In determining whether a product is a consumer product, doubtful cases +shall be resolved in favor of coverage. For a particular product received by a +particular user, “normally used†refers to a typical or common use of +that class of product, regardless of the status of the particular user or of the way +in which the particular user actually uses, or expects or is expected to use, the +product. A product is a consumer product regardless of whether the product has +substantial commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + +“Installation Information†for a User Product means any methods, +procedures, authorization keys, or other information required to install and execute +modified versions of a covered work in that User Product from a modified version of +its Corresponding Source. The information must suffice to ensure that the continued +functioning of the modified object code is in no case prevented or interfered with +solely because modification has been made. + +If you convey an object code work under this section in, or with, or specifically for +use in, a User Product, and the conveying occurs as part of a transaction in which +the right of possession and use of the User Product is transferred to the recipient +in perpetuity or for a fixed term (regardless of how the transaction is +characterized), the Corresponding Source conveyed under this section must be +accompanied by the Installation Information. But this requirement does not apply if +neither you nor any third party retains the ability to install modified object code +on the User Product (for example, the work has been installed in ROM). + +The requirement to provide Installation Information does not include a requirement to +continue to provide support service, warranty, or updates for a work that has been +modified or installed by the recipient, or for the User Product in which it has been +modified or installed. Access to a network may be denied when the modification itself +materially and adversely affects the operation of the network or violates the rules +and protocols for communication across the network. + +Corresponding Source conveyed, and Installation Information provided, in accord with +this section must be in a format that is publicly documented (and with an +implementation available to the public in source code form), and must require no +special password or key for unpacking, reading or copying. + +7. Additional Terms +^^^^^^^^^^^^^^^^^^^ + +“Additional permissions†are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. Additional +permissions that are applicable to the entire Program shall be treated as though they +were included in this License, to the extent that they are valid under applicable +law. If additional permissions apply only to part of the Program, that part may be +used separately under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option remove any +additional permissions from that copy, or from any part of it. (Additional +permissions may be written to require their own removal in certain cases when you +modify the work.) You may place additional permissions on material, added by you to a +covered work, for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you add to a +covered work, you may (if authorized by the copyright holders of that material) +supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the terms of + sections 15 and 16 of this License; or + b) Requiring preservation of specified reasonable legal notices or author + attributions in that material or in the Appropriate Legal Notices displayed by works + containing it; or + c) Prohibiting misrepresentation of the origin of that material, or requiring that + modified versions of such material be marked in reasonable ways as different from the + original version; or + d) Limiting the use for publicity purposes of names of licensors or authors of the + material; or + e) Declining to grant rights under trademark law for use of some trade names, + trademarks, or service marks; or + f) Requiring indemnification of licensors and authors of that material by anyone + who conveys the material (or modified versions of it) with contractual assumptions of + liability to the recipient, for any liability that these contractual assumptions + directly impose on those licensors and authors. + +All other non-permissive additional terms are considered “further +restrictions†within the meaning of section 10. If the Program as you received +it, or any part of it, contains a notice stating that it is governed by this License +along with a term that is a further restriction, you may remove that term. If a +license document contains a further restriction but permits relicensing or conveying +under this License, you may add to a covered work material governed by the terms of +that license document, provided that the further restriction does not survive such +relicensing or conveying. + +If you add terms to a covered work in accord with this section, you must place, in +the relevant source files, a statement of the additional terms that apply to those +files, or a notice indicating where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the form of a +separately written license, or stated as exceptions; the above requirements apply +either way. + +8. Termination +^^^^^^^^^^^^^^ + +You may not propagate or modify a covered work except as expressly provided under +this License. Any attempt otherwise to propagate or modify it is void, and will +automatically terminate your rights under this License (including any patent licenses +granted under the third paragraph of section 11). + +However, if you cease all violation of this License, then your license from a +particular copyright holder is reinstated (a) provisionally, unless and until the +copyright holder explicitly and finally terminates your license, and (b) permanently, +if the copyright holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is reinstated permanently +if the copyright holder notifies you of the violation by some reasonable means, this +is the first time you have received notice of violation of this License (for any +work) from that copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the licenses of +parties who have received copies or rights from you under this License. If your +rights have been terminated and not permanently reinstated, you do not qualify to +receive new licenses for the same material under section 10. + +9. Acceptance Not Required for Having Copies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You are not required to accept this License in order to receive or run a copy of the +Program. Ancillary propagation of a covered work occurring solely as a consequence of +using peer-to-peer transmission to receive a copy likewise does not require +acceptance. However, nothing other than this License grants you permission to +propagate or modify any covered work. These actions infringe copyright if you do not +accept this License. Therefore, by modifying or propagating a covered work, you +indicate your acceptance of this License to do so. + +10. Automatic Licensing of Downstream Recipients +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Each time you convey a covered work, the recipient automatically receives a license +from the original licensors, to run, modify and propagate that work, subject to this +License. You are not responsible for enforcing compliance by third parties with this +License. + +An “entity transaction†is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an organization, or +merging organizations. If propagation of a covered work results from an entity +transaction, each party to that transaction who receives a copy of the work also +receives whatever licenses to the work the party's predecessor in interest had or +could give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if the predecessor +has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the rights granted or +affirmed under this License. For example, you may not impose a license fee, royalty, +or other charge for exercise of rights granted under this License, and you may not +initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging +that any patent claim is infringed by making, using, selling, offering for sale, or +importing the Program or any portion of it. + +11. Patents +^^^^^^^^^^^ + +A “contributor†is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The work thus +licensed is called the contributor's “contributor versionâ€. + +A contributor's “essential patent claims†are all patent claims owned or +controlled by the contributor, whether already acquired or hereafter acquired, that +would be infringed by some manner, permitted by this License, of making, using, or +selling its contributor version, but do not include claims that would be infringed +only as a consequence of further modification of the contributor version. For +purposes of this definition, “control†includes the right to grant patent +sublicenses in a manner consistent with the requirements of this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free patent license +under the contributor's essential patent claims, to make, use, sell, offer for sale, +import and otherwise run, modify and propagate the contents of its contributor +version. + +In the following three paragraphs, a “patent license†is any express +agreement or commitment, however denominated, not to enforce a patent (such as an +express permission to practice a patent or covenant not to sue for patent +infringement). To “grant†such a patent license to a party means to make +such an agreement or commitment not to enforce a patent against the party. + +If you convey a covered work, knowingly relying on a patent license, and the +Corresponding Source of the work is not available for anyone to copy, free of charge +and under the terms of this License, through a publicly available network server or +other readily accessible means, then you must either (1) cause the Corresponding +Source to be so available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner consistent with +the requirements of this License, to extend the patent license to downstream +recipients. “Knowingly relying†means you have actual knowledge that, but +for the patent license, your conveying the covered work in a country, or your +recipient's use of the covered work in a country, would infringe one or more +identifiable patents in that country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or arrangement, you +convey, or propagate by procuring conveyance of, a covered work, and grant a patent +license to some of the parties receiving the covered work authorizing them to use, +propagate, modify or convey a specific copy of the covered work, then the patent +license you grant is automatically extended to all recipients of the covered work and +works based on it. + +A patent license is “discriminatory†if it does not include within the +scope of its coverage, prohibits the exercise of, or is conditioned on the +non-exercise of one or more of the rights that are specifically granted under this +License. You may not convey a covered work if you are a party to an arrangement with +a third party that is in the business of distributing software, under which you make +payment to the third party based on the extent of your activity of conveying the +work, and under which the third party grants, to any of the parties who would receive +the covered work from you, a discriminatory patent license (a) in connection with +copies of the covered work conveyed by you (or copies made from those copies), or (b) +primarily for and in connection with specific products or compilations that contain +the covered work, unless you entered into that arrangement, or that patent license +was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting any implied +license or other defenses to infringement that may otherwise be available to you +under applicable patent law. + +12. No Surrender of Others' Freedom +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If conditions are imposed on you (whether by court order, agreement or otherwise) +that contradict the conditions of this License, they do not excuse you from the +conditions of this License. If you cannot convey a covered work so as to satisfy +simultaneously your obligations under this License and any other pertinent +obligations, then as a consequence you may not convey it at all. For example, if you +agree to terms that obligate you to collect a royalty for further conveying from +those to whom you convey the Program, the only way you could satisfy both those terms +and this License would be to refrain entirely from conveying the Program. + +13. Use with the GNU Affero General Public License +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Notwithstanding any other provision of this License, you have permission to link or +combine any covered work with a work licensed under version 3 of the GNU Affero +General Public License into a single combined work, and to convey the resulting work. +The terms of this License will continue to apply to the part which is the covered +work, but the special requirements of the GNU Affero General Public License, section +13, concerning interaction through a network will apply to the combination as such. + +14. Revised Versions of this License +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Free Software Foundation may publish revised and/or new versions of the GNU +General Public License from time to time. Such new versions will be similar in spirit +to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies that +a certain numbered version of the GNU General Public License “or any later +version†applies to it, you have the option of following the terms and +conditions either of that numbered version or of any later version published by the +Free Software Foundation. If the Program does not specify a version number of the GNU +General Public License, you may choose any version ever published by the Free +Software Foundation. + +If the Program specifies that a proxy can decide which future versions of the GNU +General Public License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the Program. + +Later license versions may give you additional or different permissions. However, no +additional obligations are imposed on any author or copyright holder as a result of +your choosing to follow a later version. + +15. Disclaimer of Warranty +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM “AS IS†WITHOUT WARRANTY OF ANY KIND, EITHER +EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE +QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +16. Limitation of Liability +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY +COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS +PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, +INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE +OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE +WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + +17. Interpretation of Sections 15 and 16 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If the disclaimer of warranty and limitation of liability provided above cannot be +given local legal effect according to their terms, reviewing courts shall apply local +law that most closely approximates an absolute waiver of all civil liability in +connection with the Program, unless a warranty or assumption of liability accompanies +a copy of the Program in return for a fee. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs +--------------------------------------------- + +If you develop a new program, and you want it to be of the greatest possible use to +the public, the best way to achieve this is to make it free software which everyone +can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach them +to the start of each source file to most effectively state the exclusion of warranty; +and each file should have at least the “copyright†line and a pointer to +where the full notice is found. + +.. pull-quote:: + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + +If the program does terminal interaction, make it output a short notice like this +when it starts in an interactive mode: + +.. pull-quote:: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type 'show c' for details. + +The hypothetical commands `show w` and `show c` should show the appropriate parts of +the General Public License. Of course, your program's commands might be different; +for a GUI interface, you would use an “about boxâ€. + +You should also get your employer (if you work as a programmer) or school, if any, to +sign a “copyright disclaimer†for the program, if necessary. For more +information on this, and how to apply and follow the GNU GPL, see +<http://www.gnu.org/licenses/>. + +The GNU General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may consider it +more useful to permit linking proprietary applications with the library. If this is +what you want to do, use the GNU Lesser General Public License instead of this +License. But first, please read +<http://www.gnu.org/philosophy/why-not-lgpl.html>. \ No newline at end of file diff --git a/README.md b/README.md index 6108d5ec014cf07c14be3751894cbfa446f6be09..a0e413f3b195d725b0b754e99fbb53deb8f3e398 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,5 @@ -# evalhyd +# evalhyd-cpp Utility to evaluate deterministic and probabilistic streamflow predictions -Documentation: https://hycar-hydro.gitlab.irstea.page/evalhyd/evalhyd-docs/cpp - -## How to build - -Configure project in debug mode finding libraries in conda environment: -```shell -cmake -B build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_PREFIX_PATH="$CONDA_PREFIX" -``` - -Compile with: -```shell -cmake --build build/ --parallel 2 -``` - -Run tests with: -```shell -./build/tests/evalhyd_tests -``` - -## How to install - -```shell -cmake --install build/ --prefix <path> -``` +Documentation: https://hydrogr.github.io/evalhyd/cpp diff --git a/changelog.rst b/changelog.rst new file mode 100644 index 0000000000000000000000000000000000000000..2b35b9d0de9acd49e5d3a588e0972c4f91844340 --- /dev/null +++ b/changelog.rst @@ -0,0 +1,14 @@ +.. default-role:: obj + +.. + latest + ------ + + Yet to be versioned and released. Only available from *dev* branch until then. + +v0.1.0 +------ + +Released on 2023-04-14. + +* first release diff --git a/environment-dev.yml b/environment.yml similarity index 100% rename from environment-dev.yml rename to environment.yml diff --git a/include/evalhyd/detail/determinist/diagnostics.hpp b/include/evalhyd/detail/determinist/diagnostics.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5e5f96aaf35ecdfdbaae1a6c7e437a4a70c86653 --- /dev/null +++ b/include/evalhyd/detail/determinist/diagnostics.hpp @@ -0,0 +1,61 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_DETERMINIST_DIAGNOSTICS_HPP +#define EVALHYD_DETERMINIST_DIAGNOSTICS_HPP + +namespace evalhyd +{ + namespace determinist + { + namespace elements + { + /// Counts the number of time steps available in given period. + /// + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Time step counts. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 3> calc_t_counts( + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 3> t_counts = + xt::zeros<double>({n_srs, n_msk, n_exp}); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto t_msk_sampled = + xt::view(t_msk, xt::all(), xt::all(), b_exp[e]); + + // calculate the mean over the time steps + xt::view(t_counts, xt::all(), xt::all(), e) = + xt::sum(t_msk_sampled, -1); + } + + return t_counts; + } + } + } +} + +#endif //EVALHYD_DETERMINIST_DIAGNOSTICS_HPP diff --git a/include/evalhyd/detail/determinist/efficiencies.hpp b/include/evalhyd/detail/determinist/efficiencies.hpp new file mode 100644 index 0000000000000000000000000000000000000000..df893670f3f70e4b900903e9417d368f5a3c0ce4 --- /dev/null +++ b/include/evalhyd/detail/determinist/efficiencies.hpp @@ -0,0 +1,812 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_DETERMINIST_EFFICIENCIES_HPP +#define EVALHYD_DETERMINIST_EFFICIENCIES_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xoperation.hpp> + +#include "../maths.hpp" + + +namespace evalhyd +{ + namespace determinist + { + namespace elements + { + /// Compute the Pearson correlation coefficient. + /// + /// \param err_obs + /// Errors between observations and mean observation. + /// shape: (subsets, samples, series, time) + /// \param err_prd + /// Errors between predictions and mean prediction. + /// shape: (subsets, samples, series, time) + /// \param quad_err_obs + /// Quadratic errors between observations and mean observation. + /// shape: (subsets, samples, series, time) + /// \param quad_err_prd + /// Quadratic errors between predictions and mean prediction. + /// shape: (subsets, samples, series, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Pearson correlation coefficients. + /// shape: (subsets, samples, series) + inline xt::xtensor<double, 3> calc_r_pearson( + const xt::xtensor<double, 4>& err_obs, + const xt::xtensor<double, 4>& err_prd, + const xt::xtensor<double, 4>& quad_err_obs, + const xt::xtensor<double, 4>& quad_err_prd, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + // calculate error in timing and dynamics $r_{pearson}$ + // (Pearson's correlation coefficient) + xt::xtensor<double, 3> r_pearson = + xt::zeros<double>({n_msk, n_exp, n_srs}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + auto prd = xt::view(err_prd, m, e, xt::all(), b_exp[e]); + auto obs = xt::view(err_obs, m, e, xt::all(), b_exp[e]); + auto r_num = xt::nansum(prd * obs, -1); + + auto prd2 = xt::view(quad_err_prd, m, e, xt::all(), b_exp[e]); + auto obs2 = xt::view(quad_err_obs, m, e, xt::all(), b_exp[e]); + auto r_den = xt::sqrt( + xt::nansum(prd2, -1) * xt::nansum(obs2, -1) + ); + + xt::view(r_pearson, m, e) = r_num / r_den; + } + } + + return r_pearson; + } + + /// Compute the Spearman rank correlation coefficient. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (series, time) + /// \param q_prd + /// Streamflow predictions. + /// shape: (series, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Spearman rank correlation coefficients. + /// shape: (subsets, samples, series) + template <class XD2> + inline xt::xtensor<double, 3> calc_r_spearman( + const XD2& q_obs, + const XD2& q_prd, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + // calculate error in timing and dynamics $r_{spearman}$ + // (Spearman's rank correlation coefficient) + xt::xtensor<double, 3> r_spearman = + xt::zeros<double>({n_msk, n_exp, n_srs}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_prd, NAN); + auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_obs, NAN); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // compute one series at a time because xt::sort does not + // consistently put NaN values at the end/beginning, so + // need to eliminate them before the sorting + for (std::size_t s = 0; s < n_srs; s++) + { + auto prd = xt::view(prd_masked, s, b_exp[e]); + auto obs = xt::view(obs_masked, s, b_exp[e]); + + auto prd_filtered = + xt::filter(prd, !xt::isnan(prd)); + auto obs_filtered = + xt::filter(obs, !xt::isnan(obs)); + + // ------------------------------------------------- + // TODO: use `xt::argsort` with `xt::sorting_method::stable` + // when this becomes possible with `xtensor` to + // consistently sort ties across compilers + // https://github.com/xtensor-stack/xtensor/issues/2677 + // note that the second sorting (to get the + // rank) does not need the stable method + // because there will be no ties after the + // first sorting + auto prd_sort = xt::argsort(xt::eval(prd_filtered)); + auto obs_sort = xt::argsort(xt::eval(obs_filtered)); + // ------------------------------------------------- + + auto prd_rank = xt::eval(xt::argsort(prd_sort)); + auto obs_rank = xt::eval(xt::argsort(obs_sort)); + + auto mean_prd_rank = + xt::eval(xt::nanmean(prd_rank)); + auto mean_obs_rank = + xt::eval(xt::nanmean(obs_rank)); + + auto prd_rank_err = xt::eval(prd_rank - mean_prd_rank); + auto obs_rank_err = xt::eval(obs_rank - mean_obs_rank); + + auto r_num = xt::nansum(prd_rank_err * obs_rank_err); + + auto r_den = xt::sqrt( + xt::nansum(xt::square(prd_rank_err)) + * xt::nansum(xt::square(obs_rank_err)) + ); + + xt::view(r_spearman, m, e, s) = r_num / r_den; + } + } + } + + return r_spearman; + } + + /// Compute alpha. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (series, time) + /// \param q_prd + /// Streamflow predictions. + /// shape: (series, time) + /// \param mean_obs + /// Mean observed streamflow. + /// shape: (subsets, samples, series, 1) + /// \param mean_prd + /// Mean predicted streamflow. + /// shape: (subsets, samples, series, 1) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Alphas, ratios of standard deviations. + /// shape: (subsets, samples, series) + template <class XD2> + inline xt::xtensor<double, 3> calc_alpha( + const XD2& q_obs, + const XD2& q_prd, + const xt::xtensor<double, 4>& mean_obs, + const xt::xtensor<double, 4>& mean_prd, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + // calculate error in spread of flow $alpha$ + xt::xtensor<double, 3> alpha = + xt::zeros<double>({n_msk, n_exp, n_srs}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_prd, NAN); + auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_obs, NAN); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + auto prd = xt::view(prd_masked, xt::all(), b_exp[e]); + auto obs = xt::view(obs_masked, xt::all(), b_exp[e]); + xt::view(alpha, m, e) = + maths::nanstd(prd, xt::view(mean_prd, m, e)) + / maths::nanstd(obs, xt::view(mean_obs, m, e)); + } + } + + return alpha; + } + + /// Compute gamma. + /// + /// \param mean_obs + /// Mean observed streamflow. + /// shape: (subsets, samples, series, 1) + /// \param mean_prd + /// Mean predicted streamflow. + /// shape: (subsets, samples, series, 1) + /// \param alpha + /// Alphas, ratios of standard deviations. + /// shape: (subsets, samples, series) + /// \return + /// Gammas, ratios of standard deviations normalised by + /// their means. + /// shape: (subsets, samples, series) + inline xt::xtensor<double, 3> calc_gamma( + const xt::xtensor<double, 4>& mean_obs, + const xt::xtensor<double, 4>& mean_prd, + const xt::xtensor<double, 3>& alpha + ) + { + // calculate normalised error in spread of flow $gamma$ + xt::xtensor<double, 3> gamma = + alpha * (xt::view(mean_obs, xt::all(), xt::all(), xt::all(), 0) + / xt::view(mean_prd, xt::all(), xt::all(), xt::all(), 0)); + + return gamma; + } + + /// Compute non-parametric alpha. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (series, time) + /// \param q_prd + /// Streamflow predictions. + /// shape: (series, time) + /// \param mean_obs + /// Mean observed streamflow. + /// shape: (subsets, samples, series, 1) + /// \param mean_prd + /// Mean predicted streamflow. + /// shape: (subsets, samples, series, 1) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Non-parametric alphas. + /// shape: (subsets, samples, series) + template <class XD2> + inline xt::xtensor<double, 3> calc_alpha_np( + const XD2& q_obs, + const XD2& q_prd, + const xt::xtensor<double, 4>& mean_obs, + const xt::xtensor<double, 4>& mean_prd, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + // calculate error in spread of flow $alpha$ + xt::xtensor<double, 3> alpha_np = + xt::zeros<double>({n_msk, n_exp, n_srs}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_prd, NAN); + auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_obs, NAN); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // compute one series at a time because xt::sort does not + // consistently put NaN values at the end/beginning, so + // need to eliminate them before the sorting + for (std::size_t s = 0; s < n_srs; s++) + { + auto prd = xt::view(prd_masked, s, b_exp[e]); + auto obs = xt::view(obs_masked, s, b_exp[e]); + + auto prd_filtered = + xt::filter(prd, !xt::isnan(prd)); + auto obs_filtered = + xt::filter(obs, !xt::isnan(obs)); + + auto prd_fdc = xt::sort( + xt::eval(prd_filtered + / (prd_filtered.size() + * xt::view(mean_prd, m, e, s))) + ); + auto obs_fdc = xt::sort( + xt::eval(obs_filtered + / (obs_filtered.size() + * xt::view(mean_obs, m, e, s))) + ); + + xt::view(alpha_np, m, e, s) = + 1 - 0.5 * xt::nansum(xt::abs(prd_fdc - obs_fdc)); + } + } + } + + return alpha_np; + } + + /// Compute the bias. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (series, time) + /// \param q_prd + /// Streamflow predictions. + /// shape: (series, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Biases. + /// shape: (subsets, samples, series) + template <class XD2> + inline xt::xtensor<double, 3> calc_bias( + const XD2& q_obs, + const XD2& q_prd, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + // calculate $bias$ + xt::xtensor<double, 3> bias = + xt::zeros<double>({n_msk, n_exp, n_srs}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_prd, NAN); + auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_obs, NAN); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + auto prd = xt::view(prd_masked, xt::all(), b_exp[e]); + auto obs = xt::view(obs_masked, xt::all(), b_exp[e]); + xt::view(bias, m, e) = + xt::nansum(prd, -1) / xt::nansum(obs, -1); + } + } + + return bias; + } + } + + namespace metrics + { + /// Compute the Nash-Sutcliffe Efficiency (NSE). + /// + /// \param quad_err + /// Quadratic errors between observations and predictions. + /// shape: (series, time) + /// \param quad_err_obs + /// Quadratic errors between observations and mean observation. + /// shape: (subsets, samples, series, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Nash-Sutcliffe efficiencies. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 3> calc_NSE( + const xt::xtensor<double, 2>& quad_err, + const xt::xtensor<double, 4>& quad_err_obs, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 3> NSE = + xt::zeros<double>({n_srs, n_msk, n_exp}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto quad_err_masked = xt::where(xt::view(t_msk, xt::all(), m), + quad_err, NAN); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // compute squared errors operands + auto err2 = xt::view(quad_err_masked, xt::all(), b_exp[e]); + xt::xtensor<double, 1> f_num = + xt::nansum(err2, -1); + auto obs2 = xt::view(quad_err_obs, m, e, xt::all(), b_exp[e]); + xt::xtensor<double, 1> f_den = + xt::nansum(obs2, -1); + + // compute NSE + xt::view(NSE, xt::all(), m, e) = 1 - (f_num / f_den); + } + } + + return NSE; + } + + /// Compute the Kling-Gupta Efficiency (KGE). + /// + /// \param r_pearson + /// Pearson correlation coefficients. + /// shape: (subsets, samples, series) + /// \param alpha + /// Alphas, ratios of standard deviations. + /// shape: (subsets, samples, series) + /// \param bias + /// Biases. + /// shape: (subsets, samples, series) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Kling-Gupta efficiencies. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 3> calc_KGE( + const xt::xtensor<double, 3>& r_pearson, + const xt::xtensor<double, 3>& alpha, + const xt::xtensor<double, 3>& bias, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 3> KGE = + xt::zeros<double>({n_srs, n_msk, n_exp}); + + for (std::size_t m = 0; m < n_msk; m++) + { + for (std::size_t e = 0; e < n_exp; e++) + { + // compute KGE + xt::view(KGE, xt::all(), m, e) = 1 - xt::sqrt( + xt::square(xt::view(r_pearson, m, e) - 1) + + xt::square(xt::view(alpha, m, e) - 1) + + xt::square(xt::view(bias, m, e) - 1) + ); + } + } + + return KGE; + } + + /// Compute the Kling-Gupta Efficiency Decomposed (KGE_D) into + /// its three components that are the linear correlation (r), + /// the variability (alpha), and the bias (beta), in this order. + /// + /// \param r_pearson + /// Pearson correlation coefficients. + /// shape: (subsets, samples, series) + /// \param alpha + /// Alphas, ratios of standard deviations. + /// shape: (subsets, samples, series) + /// \param bias + /// Biases. + /// shape: (subsets, samples, series) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// KGE components (r, alpha, beta) for each subset + /// and for each threshold. + /// shape: (series, subsets, samples, 3) + inline xt::xtensor<double, 4> calc_KGE_D( + const xt::xtensor<double, 3>& r_pearson, + const xt::xtensor<double, 3>& alpha, + const xt::xtensor<double, 3>& bias, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 4> KGE_D = + xt::zeros<double>({n_srs, n_msk, n_exp, std::size_t {3}}); + + for (std::size_t m = 0; m < n_msk; m++) + { + for (std::size_t e = 0; e < n_exp; e++) + { + // put KGE components together + xt::view(KGE_D, xt::all(), m, e, 0) = + xt::view(r_pearson, m, e); + xt::view(KGE_D, xt::all(), m, e, 1) = + xt::view(alpha, m, e); + xt::view(KGE_D, xt::all(), m, e, 2) = + xt::view(bias, m, e); + } + } + + return KGE_D; + } + + /// Compute the modified Kling-Gupta Efficiency (KGEPRIME). + /// + /// \param r_pearson + /// Pearson correlation coefficients. + /// shape: (subsets, samples, series) + /// \param gamma + /// Gammas, ratios of standard deviations normalised by + /// their means. + /// shape: (subsets, samples, series) + /// \param bias + /// Biases. + /// shape: (subsets, samples, series) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Modified Kling-Gupta efficiencies. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 3> calc_KGEPRIME( + const xt::xtensor<double, 3>& r_pearson, + const xt::xtensor<double, 3>& gamma, + const xt::xtensor<double, 3>& bias, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 3> KGEPRIME = + xt::zeros<double>({n_srs, n_msk, n_exp}); + + for (std::size_t m = 0; m < n_msk; m++) + { + for (std::size_t e = 0; e < n_exp; e++) + { + // compute KGEPRIME + xt::view(KGEPRIME, xt::all(), m, e) = 1 - xt::sqrt( + xt::square(xt::view(r_pearson, m, e) - 1) + + xt::square(xt::view(gamma, m, e) - 1) + + xt::square(xt::view(bias, m, e) - 1) + ); + } + } + + return KGEPRIME; + } + + /// Compute the modified Kling-Gupta Efficiency Decomposed + /// (KGEPRIME_D) into its three components that are the linear + /// correlation (r), the variability (gamma), and the bias (beta), + /// in this order. + /// + /// \param r_pearson + /// Pearson correlation coefficients. + /// shape: (subsets, samples, series) + /// \param gamma + /// Gammas, ratios of standard deviations normalised by + /// their means. + /// shape: (subsets, samples, series) + /// \param bias + /// Biases. + /// shape: (subsets, samples, series) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Modified KGE components (r, gamma, beta) for each subset + /// and for each threshold. + /// shape: (series, subsets, samples, 3) + inline xt::xtensor<double, 4> calc_KGEPRIME_D( + const xt::xtensor<double, 3>& r_pearson, + const xt::xtensor<double, 3>& gamma, + const xt::xtensor<double, 3>& bias, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 4> KGEPRIME_D = + xt::zeros<double>({n_srs, n_msk, n_exp, std::size_t {3}}); + + for (std::size_t m = 0; m < n_msk; m++) + { + for (std::size_t e = 0; e < n_exp; e++) + { + // put KGE components together + xt::view(KGEPRIME_D, xt::all(), m, e, 0) = + xt::view(r_pearson, m, e); + xt::view(KGEPRIME_D, xt::all(), m, e, 1) = + xt::view(gamma, m, e); + xt::view(KGEPRIME_D, xt::all(), m, e, 2) = + xt::view(bias, m, e); + } + } + + return KGEPRIME_D; + } + + /// Compute the non-parametric Kling-Gupta Efficiency (KGENP). + /// + /// \param r_spearman + /// Spearman rank correlation coefficients. + /// shape: (subsets, samples, series) + /// \param alpha_np + /// Non-parametric alphas. + /// shape: (subsets, samples, series) + /// \param bias + /// Biases. + /// shape: (subsets, samples, series) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Modified Kling-Gupta efficiencies. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 3> calc_KGENP( + const xt::xtensor<double, 3>& r_spearman, + const xt::xtensor<double, 3>& alpha_np, + const xt::xtensor<double, 3>& bias, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 3> KGENP = + xt::zeros<double>({n_srs, n_msk, n_exp}); + + for (std::size_t m = 0; m < n_msk; m++) + { + for (std::size_t e = 0; e < n_exp; e++) + { + // compute KGEPRIME + xt::view(KGENP, xt::all(), m, e) = 1 - xt::sqrt( + xt::square(xt::view(r_spearman, m, e) - 1) + + xt::square(xt::view(alpha_np, m, e) - 1) + + xt::square(xt::view(bias, m, e) - 1) + ); + } + } + + return KGENP; + } + + /// Compute the non-parametric Kling-Gupta Efficiency + /// Decomposed (KGENP) into its three components that are the rank + /// correlation (r), the variability (non-parametric alpha), and + /// the bias (beta), in this order. + /// + /// \param r_spearman + /// Spearman correlation coefficients. + /// shape: (subsets, samples, series) + /// \param alpha_np + /// Non-parametric alphas. + /// shape: (subsets, samples, series) + /// \param bias + /// Biases. + /// shape: (subsets, samples, series) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Modified Kling-Gupta efficiencies. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 4> calc_KGENP_D( + const xt::xtensor<double, 3>& r_spearman, + const xt::xtensor<double, 3>& alpha_np, + const xt::xtensor<double, 3>& bias, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 4> KGENP_D = + xt::zeros<double>({n_srs, n_msk, n_exp, std::size_t {3}}); + + for (std::size_t m = 0; m < n_msk; m++) + { + for (std::size_t e = 0; e < n_exp; e++) + { + // put KGE components together + xt::view(KGENP_D, xt::all(), m, e, 0) = + xt::view(r_spearman, m, e); + xt::view(KGENP_D, xt::all(), m, e, 1) = + xt::view(alpha_np, m, e); + xt::view(KGENP_D, xt::all(), m, e, 2) = + xt::view(bias, m, e); + } + } + + return KGENP_D; + } + } + } +} + +#endif //EVALHYD_DETERMINIST_EFFICIENCIES_HPP \ No newline at end of file diff --git a/include/evalhyd/detail/determinist/errors.hpp b/include/evalhyd/detail/determinist/errors.hpp new file mode 100644 index 0000000000000000000000000000000000000000..646bbd4ad0776c7c97c4cb4f5a0bbbb7922125a3 --- /dev/null +++ b/include/evalhyd/detail/determinist/errors.hpp @@ -0,0 +1,481 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_DETERMINIST_ERRORS_HPP +#define EVALHYD_DETERMINIST_ERRORS_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xoperation.hpp> + +namespace evalhyd +{ + namespace determinist + { + namespace elements + { + /// Compute the mean of the observations. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (1, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Mean observed streamflow. + /// shape: (subsets, samples, series, 1) + template <class XD2> + inline xt::xtensor<double, 4> calc_mean_obs( + const XD2& q_obs, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 4> mean_obs = + xt::zeros<double>({n_msk, n_exp, n_srs, std::size_t {1}}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_obs, NAN); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto obs = xt::view(obs_masked, xt::all(), b_exp[e]); + xt::view(mean_obs, m, e) = + xt::nanmean(obs, -1, xt::keep_dims); + } + } + + return mean_obs; + } + + /// Compute the mean of the predictions. + /// + /// \param q_prd + /// Streamflow predictions. + /// shape: (series, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Mean predicted streamflow. + /// shape: (subsets, samples, series, 1) + template <class XD2> + inline xt::xtensor<double, 4> calc_mean_prd( + const XD2& q_prd, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 4> mean_prd = + xt::zeros<double>({n_msk, n_exp, n_srs, std::size_t {1}}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_prd, NAN); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto prd = xt::view(prd_masked, xt::all(), b_exp[e]); + xt::view(mean_prd, m, e) = + xt::nanmean(prd, -1, xt::keep_dims); + } + } + + return mean_prd; + } + + /// Compute the error between observations and predictions. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (1, time) + /// \param q_prd + /// Streamflow predictions. + /// shape: (series, time) + /// \return + /// Errors between observations and predictions. + /// shape: (series, time) + template <class XD2> + inline xt::xtensor<double, 2> calc_err( + const XD2& q_obs, + const XD2& q_prd + ) + { + return q_obs - q_prd; + } + + /// Compute the absolute error between observations and predictions. + /// + /// \param err + /// Errors between observations and predictions. + /// shape: (series, time) + /// \return + /// Quadratic errors between observations and predictions. + /// shape: (series, time) + inline xt::xtensor<double, 2> calc_abs_err( + const xt::xtensor<double, 2>& err + ) + { + return xt::abs(err); + } + + /// Compute the quadratic error between observations and predictions. + /// + /// \param err + /// Errors between observations and predictions. + /// shape: (series, time) + /// \return + /// Quadratic errors between observations and predictions. + /// shape: (series, time) + inline xt::xtensor<double, 2> calc_quad_err( + const xt::xtensor<double, 2>& err + ) + { + return xt::square(err); + } + + /// Compute the error between observations and mean observation. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (series, time) + /// \param mean_obs + /// Mean observed streamflow. + /// shape: (subsets, samples, series, 1) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param n_srs + /// Number of prediction series. + /// \param n_tim + /// Number of time steps. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Errors between observations and mean observation. + /// shape: (subsets, samples, series, time) + template <class XD2> + inline xt::xtensor<double, 4> calc_err_obs( + const XD2& q_obs, + const xt::xtensor<double, 4>& mean_obs, + const xt::xtensor<bool, 3>& t_msk, + std::size_t n_srs, + std::size_t n_tim, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 4> err_obs = + xt::zeros<double>({n_msk, n_exp, n_srs, n_tim}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_obs, NAN); + + for (std::size_t e = 0; e < n_exp; e++) + { + xt::view(err_obs, m, e) = ( + obs_masked - xt::view(mean_obs, m, e) + ); + } + } + + return err_obs; + } + + /// Compute the quadratic error between observations and mean observation. + /// + /// \param err_obs + /// Errors between observations and mean observation. + /// shape: (subsets, samples, series, time) + /// \return + /// Quadratic errors between observations and mean observation. + /// shape: (subsets, samples, series, time) + inline xt::xtensor<double, 4> calc_quad_err_obs( + const xt::xtensor<double, 4>& err_obs + ) + { + return xt::square(err_obs); + } + + /// Compute the error between predictions and mean prediction. + /// + /// \param q_prd + /// Streamflow predictions. + /// shape: (series, time) + /// \param mean_prd + /// Mean predicted streamflow. + /// shape: (subsets, samples, series, 1) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param n_srs + /// Number of prediction series. + /// \param n_tim + /// Number of time steps. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Errors between predictions and mean prediction. + /// shape: (subsets, samples, series, time) + template <class XD2> + inline xt::xtensor<double, 4> calc_err_prd( + const XD2& q_prd, + const xt::xtensor<double, 4>& mean_prd, + const xt::xtensor<bool, 3>& t_msk, + std::size_t n_srs, + std::size_t n_tim, + std::size_t n_msk, + std::size_t n_exp + ) + { + xt::xtensor<double, 4> quad_err_prd = + xt::zeros<double>({n_msk, n_exp, n_srs, n_tim}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m), + q_prd, NAN); + + for (std::size_t e = 0; e < n_exp; e++) + { + xt::view(quad_err_prd, m, e) = ( + prd_masked - xt::view(mean_prd, m, e) + ); + } + } + + return quad_err_prd; + } + + /// Compute the quadratic error between predictions and mean prediction. + /// + /// \param err_prd + /// Errors between predictions and mean prediction. + /// shape: (subsets, samples, series, time) + /// \return + /// Quadratic errors between predictions and mean prediction. + /// shape: (subsets, samples, series, time) + inline xt::xtensor<double, 4> calc_quad_err_prd( + const xt::xtensor<double, 4>& err_prd + ) + { + return xt::square(err_prd); + } + } + + namespace metrics + { + /// Compute the mean absolute error (MAE). + /// + /// \param abs_err + /// Absolute errors between observations and predictions. + /// shape: (series, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Mean absolute errors. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 3> calc_MAE( + const xt::xtensor<double, 2>& abs_err, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + // compute RMSE + xt::xtensor<double, 3> MAE = + xt::zeros<double>({n_srs, n_msk, n_exp}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto abs_err_masked = xt::where(xt::view(t_msk, xt::all(), m), + abs_err, NAN); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + auto err = xt::view(abs_err_masked, xt::all(), b_exp[e]); + xt::view(MAE, xt::all(), m, e) = xt::nanmean(err, -1); + } + } + + return MAE; + } + + /// Compute the mean absolute relative error (MARE). + /// + /// \param MAE + /// Mean absolute errors. + /// shape: (series, subsets, samples) + /// \param mean_obs + /// Mean observed streamflow. + /// shape: (subsets, samples, series, 1) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Mean absolute relative errors. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 3> calc_MARE( + const xt::xtensor<double, 3>& MAE, + const xt::xtensor<double, 4>& mean_obs, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + // compute RMSE + xt::xtensor<double, 3> MARE = + xt::zeros<double>({n_srs, n_msk, n_exp}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + xt::view(MARE, xt::all(), m, e) = + xt::view(MAE, xt::all(), m, e) + / xt::view(mean_obs, m, e, xt::all(), 0); + } + } + + return MARE; + } + + /// Compute the mean square error (MSE). + /// + /// \param quad_err + /// Quadratic errors between observations and predictions. + /// shape: (series, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Mean square errors. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 3> calc_MSE( + const xt::xtensor<double, 2>& quad_err, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + // compute RMSE + xt::xtensor<double, 3> MSE = + xt::zeros<double>({n_srs, n_msk, n_exp}); + + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto quad_err_masked = xt::where(xt::view(t_msk, xt::all(), m), + quad_err, NAN); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + auto err2 = xt::view(quad_err_masked, xt::all(), b_exp[e]); + xt::view(MSE, xt::all(), m, e) = xt::nanmean(err2, -1); + } + } + + return MSE; + } + + /// Compute the root mean square error (RMSE). + /// + /// \param MSE + /// Mean square errors. + /// shape: (series, subsets, samples) + /// \return + /// Root mean square errors. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 3> calc_RMSE( + const xt::xtensor<double, 3>& MSE + ) + { + // compute RMSE + auto RMSE = xt::sqrt(MSE); + + return RMSE; + } + } + } +} + +#endif //EVALHYD_DETERMINIST_ERRORS_HPP \ No newline at end of file diff --git a/include/evalhyd/detail/determinist/evaluator.hpp b/include/evalhyd/detail/determinist/evaluator.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bb8202516edd63f317b746b1f66340fecf9f089c --- /dev/null +++ b/include/evalhyd/detail/determinist/evaluator.hpp @@ -0,0 +1,567 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_DETERMINIST_EVALUATOR_HPP +#define EVALHYD_DETERMINIST_EVALUATOR_HPP + +#include <vector> + +#include <xtl/xoptional.hpp> +#include <xtensor/xexpression.hpp> +#include <xtensor/xtensor.hpp> + +#include "diagnostics.hpp" +#include "errors.hpp" +#include "efficiencies.hpp" +#include "events.hpp" + + +namespace evalhyd +{ + namespace determinist + { + template <class XD2, class XB3> + class Evaluator + { + private: + // members for input data + const XD2& q_obs; + const XD2& q_prd; + // members for optional input data + const XD2& _q_thr; + xtl::xoptional<const std::string, bool> _events; + xt::xtensor<bool, 3> t_msk; + const std::vector<xt::xkeep_slice<int>>& b_exp; + + // members for dimensions + std::size_t n_tim; + std::size_t n_msk; + std::size_t n_srs; + std::size_t n_thr; + std::size_t n_exp; + + // members for computational elements + xtl::xoptional<xt::xtensor<double, 3>, bool> t_counts; + xtl::xoptional<xt::xtensor<double, 4>, bool> mean_obs; + xtl::xoptional<xt::xtensor<double, 4>, bool> mean_prd; + xtl::xoptional<xt::xtensor<double, 2>, bool> err; + xtl::xoptional<xt::xtensor<double, 2>, bool> abs_err; + xtl::xoptional<xt::xtensor<double, 2>, bool> quad_err; + xtl::xoptional<xt::xtensor<double, 4>, bool> err_obs; + xtl::xoptional<xt::xtensor<double, 4>, bool> quad_err_obs; + xtl::xoptional<xt::xtensor<double, 4>, bool> err_prd; + xtl::xoptional<xt::xtensor<double, 4>, bool> quad_err_prd; + xtl::xoptional<xt::xtensor<double, 3>, bool> r_pearson; + xtl::xoptional<xt::xtensor<double, 3>, bool> r_spearman; + xtl::xoptional<xt::xtensor<double, 3>, bool> alpha; + xtl::xoptional<xt::xtensor<double, 3>, bool> gamma; + xtl::xoptional<xt::xtensor<double, 3>, bool> alpha_np; + xtl::xoptional<xt::xtensor<double, 3>, bool> bias; + xtl::xoptional<xt::xtensor<double, 3>, bool> obs_event; + xtl::xoptional<xt::xtensor<double, 3>, bool> prd_event; + xtl::xoptional<xt::xtensor<double, 3>, bool> ct_a; + xtl::xoptional<xt::xtensor<double, 3>, bool> ct_b; + xtl::xoptional<xt::xtensor<double, 3>, bool> ct_c; + xtl::xoptional<xt::xtensor<double, 3>, bool> ct_d; + + // members for evaluation metrics + xtl::xoptional<xt::xtensor<double, 3>, bool> MAE; + xtl::xoptional<xt::xtensor<double, 3>, bool> MARE; + xtl::xoptional<xt::xtensor<double, 3>, bool> MSE; + xtl::xoptional<xt::xtensor<double, 3>, bool> RMSE; + xtl::xoptional<xt::xtensor<double, 3>, bool> NSE; + xtl::xoptional<xt::xtensor<double, 3>, bool> KGE; + xtl::xoptional<xt::xtensor<double, 4>, bool> KGE_D; + xtl::xoptional<xt::xtensor<double, 3>, bool> KGEPRIME; + xtl::xoptional<xt::xtensor<double, 4>, bool> KGEPRIME_D; + xtl::xoptional<xt::xtensor<double, 3>, bool> KGENP; + xtl::xoptional<xt::xtensor<double, 4>, bool> KGENP_D; + xtl::xoptional<xt::xtensor<double, 5>, bool> CONT_TBL; + + // methods to get optional parameters + auto get_q_thr() + { + if (_q_thr.size() < 1) + { + throw std::runtime_error( + "threshold-based metric requested, " + "but *q_thr* not provided" + ); + } + else{ + return _q_thr; + } + } + + bool is_high_flow_event() + { + if (_events.has_value()) + { + if (_events.value() == "high") + { + return true; + } + else if (_events.value() == "low") + { + return false; + } + else + { + throw std::runtime_error( + "invalid value for *events*: " + _events.value() + ); + } + } + else + { + throw std::runtime_error( + "threshold-based metric requested, " + "but *events* not provided" + ); + } + } + + // methods to compute elements + xt::xtensor<double, 3> get_t_counts() + { + if (!t_counts.has_value()) + { + t_counts = elements::calc_t_counts( + t_msk, b_exp, n_srs, n_msk, n_exp + ); + } + return t_counts.value(); + }; + + xt::xtensor<double, 4> get_mean_obs() + { + if (!mean_obs.has_value()) + { + mean_obs = elements::calc_mean_obs( + q_obs, t_msk, b_exp, n_srs, n_msk, n_exp + ); + } + return mean_obs.value(); + }; + + xt::xtensor<double, 4> get_mean_prd() + { + if (!mean_prd.has_value()) + { + mean_prd = elements::calc_mean_prd( + q_prd, t_msk, b_exp, n_srs, n_msk, n_exp + ); + } + return mean_prd.value(); + }; + + xt::xtensor<double, 2> get_err() + { + if (!err.has_value()) + { + err = elements::calc_err( + q_obs, q_prd + ); + } + return err.value(); + }; + + xt::xtensor<double, 2> get_abs_err() + { + if (!abs_err.has_value()) + { + abs_err = elements::calc_abs_err( + get_err() + ); + } + return abs_err.value(); + }; + + xt::xtensor<double, 2> get_quad_err() + { + if (!quad_err.has_value()) + { + quad_err = elements::calc_quad_err( + get_err() + ); + } + return quad_err.value(); + }; + + xt::xtensor<double, 4> get_err_obs() + { + if (!err_obs.has_value()) + { + err_obs = elements::calc_err_obs( + q_obs, get_mean_obs(), t_msk, + n_srs, n_tim, n_msk, n_exp + ); + } + return err_obs.value(); + }; + + xt::xtensor<double, 4> get_quad_err_obs() + { + if (!quad_err_obs.has_value()) + { + quad_err_obs = elements::calc_quad_err_obs( + get_err_obs() + ); + } + return quad_err_obs.value(); + }; + + xt::xtensor<double, 4> get_err_prd() + { + if (!err_prd.has_value()) + { + err_prd = elements::calc_err_prd( + q_prd, get_mean_prd(), t_msk, + n_srs, n_tim, n_msk, n_exp + ); + } + return err_prd.value(); + }; + + xt::xtensor<double, 4> get_quad_err_prd() + { + if (!quad_err_prd.has_value()) + { + quad_err_prd = elements::calc_quad_err_prd( + get_err_prd() + ); + } + return quad_err_prd.value(); + }; + + xt::xtensor<double, 3> get_r_pearson() + { + if (!r_pearson.has_value()) + { + r_pearson = elements::calc_r_pearson( + get_err_obs(), get_err_prd(), + get_quad_err_obs(), get_quad_err_prd(), + t_msk, b_exp, + n_srs, n_msk, n_exp + ); + } + return r_pearson.value(); + }; + + xt::xtensor<double, 3> get_r_spearman() + { + if (!r_spearman.has_value()) + { + r_spearman = elements::calc_r_spearman( + q_obs, q_prd, t_msk, b_exp, + n_srs, n_msk, n_exp + ); + } + return r_spearman.value(); + }; + + xt::xtensor<double, 3> get_alpha() + { + if (!alpha.has_value()) + { + alpha = elements::calc_alpha( + q_obs, q_prd, get_mean_obs(), get_mean_prd(), + t_msk, b_exp, n_srs, n_msk, n_exp + ); + } + return alpha.value(); + }; + + xt::xtensor<double, 3> get_gamma() + { + if (!gamma.has_value()) + { + gamma = elements::calc_gamma( + get_mean_obs(), get_mean_prd(), get_alpha() + ); + } + return gamma.value(); + }; + + xt::xtensor<double, 3> get_alpha_np() + { + if (!alpha_np.has_value()) + { + alpha_np = elements::calc_alpha_np( + q_obs, q_prd, get_mean_obs(), get_mean_prd(), + t_msk, b_exp, n_srs, n_msk, n_exp + ); + } + return alpha_np.value(); + }; + + xt::xtensor<double, 3> get_bias() + { + if (!bias.has_value()) + { + bias = elements::calc_bias( + q_obs, q_prd, t_msk, b_exp, n_srs, n_msk, n_exp + ); + } + return bias.value(); + }; + + xt::xtensor<double, 3> get_obs_event() + { + if (!obs_event.has_value()) + { + obs_event = elements::calc_obs_event( + q_obs, get_q_thr(), is_high_flow_event() + ); + } + return obs_event.value(); + }; + + xt::xtensor<double, 3> get_prd_event() + { + if (!prd_event.has_value()) + { + prd_event = elements::calc_prd_event( + q_prd, get_q_thr(), is_high_flow_event() + ); + } + return prd_event.value(); + }; + + xt::xtensor<double, 3> get_ct_a() + { + if (!ct_a.has_value()) + { + ct_a = elements::calc_ct_a( + get_obs_event(), get_prd_event() + ); + } + return ct_a.value(); + }; + + xt::xtensor<double, 3> get_ct_b() + { + if (!ct_b.has_value()) + { + ct_b = elements::calc_ct_b( + get_obs_event(), get_prd_event() + ); + } + return ct_b.value(); + }; + + xt::xtensor<double, 3> get_ct_c() + { + if (!ct_c.has_value()) + { + ct_c = elements::calc_ct_c( + get_obs_event(), get_prd_event() + ); + } + return ct_c.value(); + }; + + xt::xtensor<double, 3> get_ct_d() + { + if (!ct_d.has_value()) + { + ct_d = elements::calc_ct_d( + get_obs_event(), get_prd_event() + ); + } + return ct_d.value(); + }; + + public: + // constructor method + Evaluator(const XD2& obs, + const XD2& prd, + const XD2& thr, + xtl::xoptional<const std::string&, bool> events, + const XB3& msk, + const std::vector<xt::xkeep_slice<int>>& exp) : + q_obs{obs}, q_prd{prd}, + _q_thr{thr}, _events{events}, + t_msk{msk}, b_exp{exp} + { + // initialise a mask if none provided + // (corresponding to no temporal subset) + if (msk.size() < 1) + { + t_msk = xt::ones<bool>( + {q_prd.shape(0), std::size_t {1}, q_prd.shape(1)} + ); + } + + // determine size for recurring dimensions + n_srs = q_prd.shape(0); + n_tim = q_prd.shape(1); + n_msk = t_msk.shape(1); + n_thr = _q_thr.shape(1); + n_exp = b_exp.size(); + + // drop time steps where observations or predictions are NaN + for (std::size_t s = 0; s < n_srs; s++) + { + auto obs_nan = xt::isnan(xt::view(q_obs, 0)); + auto prd_nan = xt::isnan(xt::view(q_prd, s)); + + auto msk_nan = xt::where(obs_nan || prd_nan)[0]; + + xt::view(t_msk, s, xt::all(), xt::keep(msk_nan)) = false; + } + }; + + // methods to compute metrics + xt::xtensor<double, 3> get_MAE() + { + if (!MAE.has_value()) + { + MAE = metrics::calc_MAE( + get_abs_err(), t_msk, b_exp, n_srs, n_msk, n_exp + ); + } + return MAE.value(); + }; + + xt::xtensor<double, 3> get_MARE() + { + if (!MARE.has_value()) + { + MARE = metrics::calc_MARE( + get_MAE(), get_mean_obs(), n_srs, n_msk, n_exp + ); + } + return MARE.value(); + }; + + xt::xtensor<double, 3> get_MSE() + { + if (!MSE.has_value()) + { + MSE = metrics::calc_MSE( + get_quad_err(), t_msk, b_exp, n_srs, n_msk, n_exp + ); + } + return MSE.value(); + }; + + xt::xtensor<double, 3> get_RMSE() + { + if (!RMSE.has_value()) + { + RMSE = metrics::calc_RMSE( + get_MSE() + ); + } + return RMSE.value(); + }; + + xt::xtensor<double, 3> get_NSE() + { + if (!NSE.has_value()) + { + NSE = metrics::calc_NSE( + get_quad_err(), get_quad_err_obs(), t_msk, b_exp, + n_srs, n_msk, n_exp + ); + } + return NSE.value(); + }; + + xt::xtensor<double, 3> get_KGE() + { + if (!KGE.has_value()) + { + KGE = metrics::calc_KGE( + get_r_pearson(), get_alpha(), get_bias(), + n_srs, n_msk, n_exp + ); + } + return KGE.value(); + }; + + xt::xtensor<double, 4> get_KGE_D() + { + if (!KGE_D.has_value()) + { + KGE_D = metrics::calc_KGE_D( + get_r_pearson(), get_alpha(), get_bias(), + n_srs, n_msk, n_exp + ); + } + return KGE_D.value(); + }; + + xt::xtensor<double, 3> get_KGEPRIME() + { + if (!KGEPRIME.has_value()) + { + KGEPRIME = metrics::calc_KGEPRIME( + get_r_pearson(), get_gamma(), get_bias(), + n_srs, n_msk, n_exp + ); + } + return KGEPRIME.value(); + }; + + xt::xtensor<double, 4> get_KGEPRIME_D() + { + if (!KGEPRIME_D.has_value()) + { + KGEPRIME_D = metrics::calc_KGEPRIME_D( + get_r_pearson(), get_gamma(), get_bias(), + n_srs, n_msk, n_exp + ); + } + return KGEPRIME_D.value(); + }; + + xt::xtensor<double, 3> get_KGENP() + { + if (!KGENP.has_value()) + { + KGENP = metrics::calc_KGENP( + get_r_spearman(), get_alpha_np(), get_bias(), + n_srs, n_msk, n_exp + ); + } + return KGENP.value(); + }; + + xt::xtensor<double, 4> get_KGENP_D() + { + if (!KGENP_D.has_value()) + { + KGENP_D = metrics::calc_KGENP_D( + get_r_spearman(), get_alpha_np(), get_bias(), + n_srs, n_msk, n_exp + ); + } + return KGENP_D.value(); + }; + + xt::xtensor<double, 5> get_CONT_TBL() + { + if (!CONT_TBL.has_value()) + { + CONT_TBL = metrics::calc_CONT_TBL( + get_q_thr(), get_ct_a(), get_ct_b(), get_ct_c(), + get_ct_d(), t_msk, b_exp, + n_srs, n_thr, n_msk, n_exp + ); + } + return CONT_TBL.value(); + }; + + // methods to compute diagnostics + xt::xtensor<double, 3> get_completeness() + { + return get_t_counts(); + }; + }; + } +} + +#endif //EVALHYD_DETERMINIST_EVALUATOR_HPP diff --git a/include/evalhyd/detail/determinist/events.hpp b/include/evalhyd/detail/determinist/events.hpp new file mode 100644 index 0000000000000000000000000000000000000000..794ccef8990fb313ccfec0e82d8746049549ab84 --- /dev/null +++ b/include/evalhyd/detail/determinist/events.hpp @@ -0,0 +1,279 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_DETERMINIST_EVENTS_HPP +#define EVALHYD_DETERMINIST_EVENTS_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xmasked_view.hpp> +#include <xtensor/xmath.hpp> + + +namespace evalhyd +{ + namespace determinist + { + namespace elements + { + // Contingency table: + // + // OBS + // Y N + // +-----+-----+ a: hits + // Y | a | b | b: false alarms + // PRD +-----+-----+ c: misses + // N | c | d | d: correct rejections + // +-----+-----+ + // + + /// Determine observed realisation of threshold(s) exceedance. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (1, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (series, thresholds) + /// \param is_high_flow_event + /// Whether events correspond to being above the threshold(s). + /// \return + /// Event observed outcome. + /// shape: (series, thresholds, time) + template<class XD2> + inline xt::xtensor<double, 3> calc_obs_event( + const XD2& q_obs, + const XD2& q_thr, + bool is_high_flow_event + ) + { + if (is_high_flow_event) + { + // observations above threshold(s) + return xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()) + >= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis()); + } + else + { + // observations below threshold(s) + return xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()) + <= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis()); + } + } + + /// Determine predicted realisation of threshold(s) exceedance. + /// + /// \param q_prd + /// Streamflow predictions. + /// shape: (series, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (series, thresholds) + /// \param is_high_flow_event + /// Whether events correspond to being above the threshold(s). + /// \return + /// Event predicted outcome. + /// shape: (series, thresholds, time) + template<class XD2> + inline xt::xtensor<double, 3> calc_prd_event( + const XD2& q_prd, + const XD2& q_thr, + bool is_high_flow_event + ) + { + if (is_high_flow_event) + { + // observations above threshold(s) + return xt::view(q_prd, xt::all(), xt::newaxis(), xt::all()) + >= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis()); + } + else + { + // observations below threshold(s) + return xt::view(q_prd, xt::all(), xt::newaxis(), xt::all()) + <= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis()); + } + } + + /// Determine hits ('a' in contingency table). + /// + /// \param obs_event + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param prd_event + /// Predicted event outcome. + /// shape: (sites, thresholds, time) + /// \return + /// Hits. + /// shape: (sites, thresholds, time) + inline xt::xtensor<double, 3> calc_ct_a( + const xt::xtensor<double, 3>& obs_event, + const xt::xtensor<double, 3>& prd_event + ) + { + return xt::equal(obs_event, 1.) && xt::equal(prd_event, 1.); + } + + /// Determine false alarms ('b' in contingency table). + /// + /// \param obs_event + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param prd_event + /// Predicted event outcome. + /// shape: (sites, thresholds, time) + /// \return + /// False alarms. + /// shape: (sites, thresholds, time) + inline xt::xtensor<double, 3> calc_ct_b( + const xt::xtensor<double, 3>& obs_event, + const xt::xtensor<double, 3>& prd_event + ) + { + return xt::equal(obs_event, 0.) && xt::equal(prd_event, 1.); + } + + /// Determine misses ('c' in contingency table). + /// + /// \param obs_event + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param prd_event + /// Predicted event outcome. + /// shape: (sites, thresholds, time) + /// \return + /// Misses. + /// shape: (sites, thresholds, time) + inline xt::xtensor<double, 3> calc_ct_c( + const xt::xtensor<double, 3>& obs_event, + const xt::xtensor<double, 3>& prd_event + ) + { + return xt::equal(obs_event, 1.) && xt::equal(prd_event, 0.); + } + + /// Determine correct rejections ('d' in contingency table). + /// + /// \param obs_event + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param prd_event + /// Predicted event outcome. + /// shape: (sites, thresholds, time) + /// \return + /// Correct rejections. + /// shape: (sites, thresholds, time) + inline xt::xtensor<double, 3> calc_ct_d( + const xt::xtensor<double, 3>& obs_event, + const xt::xtensor<double, 3>& prd_event + ) + { + return xt::equal(obs_event, 0.) && xt::equal(prd_event, 0.); + } + } + + namespace metrics + { + /// Compute the cells of the contingency table (CONT_TBL), + /// i.e. 'hits', 'false alarms', 'misses', 'correct rejections', + /// in this order. + /// + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (series, thresholds) + /// \param ct_a + /// Hits for each time step. + /// shape: (series, thresholds, time) + /// \param ct_b + /// False alarms for each time step. + /// shape: (series, thresholds, time) + /// \param ct_c + /// Misses for each time step. + /// shape: (series, thresholds, time) + /// \param ct_d + /// Correct rejections for each time step. + /// shape: (series, thresholds, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_thr + /// Number of thresholds. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Probabilities of detection. + /// shape: (series, subsets, samples, thresholds, cells) + template<class XD2> + inline xt::xtensor<double, 5> calc_CONT_TBL( + const XD2& q_thr, + const xt::xtensor<double, 3>& ct_a, + const xt::xtensor<double, 3>& ct_b, + const xt::xtensor<double, 3>& ct_c, + const xt::xtensor<double, 3>& ct_d, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_thr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 5> CONT_TBL = + xt::zeros<double>({n_srs, n_msk, n_exp, + n_thr, std::size_t {4}}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + std::size_t i = 0; + for (auto cell: {ct_a, ct_b, ct_c, ct_d}) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto cell_masked = xt::where( + xt::view(t_msk, xt::all(), m, xt::newaxis(), xt::all()), + cell, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto cell_masked_sampled = + xt::view(cell_masked, xt::all(), xt::all(), + b_exp[e]); + + // calculate the mean over the time steps + xt::view(CONT_TBL, xt::all(), m, e, xt::all(), i) = + xt::nansum(cell_masked_sampled, -1); + } + + i++; + } + } + + // assign NaN where thresholds were not provided (i.e. set as NaN) + xt::masked_view( + CONT_TBL, + xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(), + xt::newaxis(), xt::all(), + xt::newaxis())) + ) = NAN; + + return CONT_TBL; + } + } + } +} + +#endif //EVALHYD_DETERMINIST_EVENTS_HPP diff --git a/src/masks.hpp b/include/evalhyd/detail/masks.hpp similarity index 76% rename from src/masks.hpp rename to include/evalhyd/detail/masks.hpp index d4a1c6bd8c9e09013bb4c23b15165f3ea67ee5a1..60b94f229a8d2ca94045e90999371c7b3abc3486 100644 --- a/src/masks.hpp +++ b/include/evalhyd/detail/masks.hpp @@ -1,3 +1,7 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + #ifndef EVALHYD_MASKS_HPP #define EVALHYD_MASKS_HPP @@ -15,9 +19,6 @@ #include <xtensor/xsort.hpp> #include <xtensor/xindex_view.hpp> -#include "maths.hpp" - -namespace eh = evalhyd; typedef std::map<std::string, std::vector<std::vector<std::string>>> msk_tree; @@ -62,32 +63,42 @@ namespace evalhyd std::set<std::string> supported_op = {"<", ">", "<=", ">=", "!=", "=="}; if (mt[1].str().empty()) + { throw std::runtime_error( "missing operator for streamflow masking condition" ); + } else if (supported_op.find(mt[1]) != supported_op.end()) { if ((mt[2].str() == "median") || (mt[2].str() == "mean") || (mt[2].str() == "quantile")) + { conditions.push_back({mt[1].str(), mt[2].str(), mt[3].str()}); + } else + { // it is a simple numerical value, swap last two conditions.push_back({mt[1].str(), mt[3].str(), mt[2].str()}); + } } else + { throw std::runtime_error( "invalid operator for streamflow masking " "condition: " + mt[1].str() ); + } } // check that a maximum of two conditions were provided if (conditions.size() > 2) + { throw std::runtime_error( "no more than two streamflow masking conditions " "can be provided" ); + } subset[var] = conditions; } @@ -119,11 +130,15 @@ namespace evalhyd // check whether it is all indices, a range of indices, or an index if (m[1] == ":") + { // it is all indices (i.e. t{:}) so keep everything condition.emplace_back(); + } else if (m[2].str().empty()) + { // it is an index (i.e. t{#}) condition.push_back({m[1].str()}); + } else { // it is a range of indices (i.e. t{#:#}) @@ -148,10 +163,11 @@ namespace evalhyd } /// Function to generate temporal mask based on masking conditions + template<class X1, class X2> inline xt::xtensor<bool, 1> generate_mask_from_conditions( const std::array<char, 32>& msk_char_arr, - const xt::xtensor<double, 1>& q_obs, - const xt::xtensor<double, 2>& q_prd = {} + const X1& q_obs, + const X2& q_prd ) { // parse string to identify masking conditions @@ -160,9 +176,11 @@ namespace evalhyd // check if conditions were found in parsing if (subset.empty()) + { throw std::runtime_error( "no valid condition found to generate mask(s)" ); + } // initialise a boolean expression for the masks xt::xtensor<bool, 1> t_msk = xt::zeros<bool>(q_obs.shape()); @@ -179,25 +197,33 @@ namespace evalhyd { // preprocess streamflow depending on kind auto get_q = [&]() { - if (var == "q_obs") { - return q_obs; + if (var == "q_obs") + { + return xt::xtensor<double, 1>(q_obs); } - else if (var == "q_prd_median") { - if (q_prd.size() < 1) + else if (var == "q_prd_median") + { + if (q_prd.shape(0) == 1) + { throw std::runtime_error( "condition on streamflow predictions " "not allowed for generating masks" ); + } xt::xtensor<double, 1> q_prd_median = xt::median(q_prd, 0); return q_prd_median; } - else { // i.e. (var == "q_prd_mean") - if (q_prd.size() < 1) + else + { + // i.e. (var == "q_prd_mean") + if (q_prd.shape(0) == 1) + { throw std::runtime_error( "condition on streamflow predictions " "not allowed for generating masks" ); + } xt::xtensor<double, 1> q_prd_mean = xt::mean(q_prd, 0); return q_prd_mean; @@ -206,16 +232,27 @@ namespace evalhyd auto q = get_q(); // define lambda function to precompute mean/median/quantile - auto get_val = [&](const std::string& str, const std::string& num) { - if (str.empty()) - // it is a simple numerical value + + + auto get_val = + [&](const std::string& str, const std::string& num) + { + if (str.empty()) // it is a simple numerical value + { return std::stod(num); + } else if (str == "median") + { return xt::median(q); + } else if (str == "mean") + { return xt::mean(q)(); + } else // (str == "quantile") - return eh::maths::quantile(q, std::stod(num)); + { + return xt::quantile(q, {std::stod(num)})(); + } }; // preprocess conditions to identify special cases @@ -238,8 +275,13 @@ namespace evalhyd if ((opr2 == ">") || (opr2 == ">=")) { if (val2 > val1) + { without = true; - else { within = true; } + } + else + { + within = true; + } } } else if ((opr1 == ">") || (opr1 == ">=")) @@ -247,8 +289,13 @@ namespace evalhyd if ((opr2 == "<") || (opr2 == "<=")) { if (val2 > val1) + { within = true; - else { without = true; } + } + else + { + without = true; + } } } } @@ -258,58 +305,90 @@ namespace evalhyd if (within) { if ((opr1 == "<") && (opr2 == ">")) - t_msk = xt::where((q < val1) & (q > val2), + { + t_msk = xt::where((q < val1) && (q > val2), 1, t_msk); + } else if ((opr1 == "<=") && (opr2 == ">")) - t_msk = xt::where((q <= val1) & (q > val2), + { + t_msk = xt::where((q <= val1) && (q > val2), 1, t_msk); + } else if ((opr1 == "<") && (opr2 == ">=")) - t_msk = xt::where((q < val1) & (q >= val2), + { + t_msk = xt::where((q < val1) && (q >= val2), 1, t_msk); + } else if ((opr1 == "<=") && (opr2 == ">=")) - t_msk = xt::where((q <= val1) & (q >= val2), + { + t_msk = xt::where((q <= val1) && (q >= val2), 1, t_msk); + } if ((opr2 == "<") && (opr1 == ">")) - t_msk = xt::where((q < val2) & (q > val1), + { + t_msk = xt::where((q < val2) && (q > val1), 1, t_msk); + } else if ((opr2 == "<=") && (opr1 == ">")) - t_msk = xt::where((q <= val2) & (q > val1), + { + t_msk = xt::where((q <= val2) && (q > val1), 1, t_msk); + } else if ((opr2 == "<") && (opr1 == ">=")) - t_msk = xt::where((q < val2) & (q >= val1), + { + t_msk = xt::where((q < val2) && (q >= val1), 1, t_msk); + } else if ((opr2 == "<=") && (opr1 == ">=")) - t_msk = xt::where((q <= val2) & (q >= val1), + { + t_msk = xt::where((q <= val2) && (q >= val1), 1, t_msk); + } } else if (without) { if ((opr1 == "<") && (opr2 == ">")) - t_msk = xt::where((q < val1) | (q > val2), + { + t_msk = xt::where((q < val1) || (q > val2), 1, t_msk); + } else if ((opr1 == "<=") && (opr2 == ">")) - t_msk = xt::where((q <= val1) | (q > val2), + { + t_msk = xt::where((q <= val1) || (q > val2), 1, t_msk); + } else if ((opr1 == "<") && (opr2 == ">=")) - t_msk = xt::where((q < val1) | (q >= val2), + { + t_msk = xt::where((q < val1) || (q >= val2), 1, t_msk); + } else if ((opr1 == "<=") && (opr2 == ">=")) - t_msk = xt::where((q <= val1) & (q >= val2), + { + t_msk = xt::where((q <= val1) && (q >= val2), 1, t_msk); + } if ((opr2 == "<") && (opr1 == ">")) - t_msk = xt::where((q < val2) | (q > val1), + { + t_msk = xt::where((q < val2) || (q > val1), 1, t_msk); + } else if ((opr2 == "<=") && (opr1 == ">")) - t_msk = xt::where((q <= val2) | (q > val1), + { + t_msk = xt::where((q <= val2) || (q > val1), 1, t_msk); + } else if ((opr2 == "<") && (opr1 == ">=")) - t_msk = xt::where((q < val2) | (q >= val1), + { + t_msk = xt::where((q < val2) || (q >= val1), 1, t_msk); + } else if ((opr2 == "<=") && (opr1 == ">=")) - t_msk = xt::where((q <= val2) | (q >= val1), + { + t_msk = xt::where((q <= val2) || (q >= val1), 1, t_msk); + } } else { @@ -320,40 +399,54 @@ namespace evalhyd // apply masking condition to given subset if (opr == "<") + { t_msk = xt::where( q < val, 1, t_msk ); + } else if (opr == ">") + { t_msk = xt::where( q > val, 1, t_msk ); + } else if (opr == "<=") + { t_msk = xt::where( q <= val, 1, t_msk ); + } else if (opr == ">=") + { t_msk = xt::where( q >= val, 1, t_msk ); + } else if (opr == "==") + { t_msk = xt::where( xt::equal(q, val), 1, t_msk ); + } else if (opr == "!=") + { t_msk = xt::where( xt::not_equal(q, val), 1, t_msk ); + } } } } - // condition on time index + // condition on time index else if (var == "t") { for (const auto & sequence : cond) { if (sequence.empty()) + { // i.e. t{:} xt::view(t_msk, xt::all()) = 1; + } else { // convert string indices to integer indices diff --git a/include/evalhyd/detail/maths.hpp b/include/evalhyd/detail/maths.hpp new file mode 100644 index 0000000000000000000000000000000000000000..49786ad67145a847f5b5faaf5b94b5d94ac80630 --- /dev/null +++ b/include/evalhyd/detail/maths.hpp @@ -0,0 +1,33 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_MATHS_HPP +#define EVALHYD_MATHS_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xsort.hpp> +#include <xtensor/xbuilder.hpp> +#include <xtensor/xutils.hpp> + +#include <cmath> + +namespace evalhyd +{ + namespace maths + { + // TODO: substitute with `xt::stddev` when performance fixed + // (see https://github.com/xtensor-stack/xtensor/pull/2656) + // function to calculate standard deviation on last axis of n-dim expressions + template <class A1, class A2> + inline auto nanstd(A1&& arr, A2&& mean_arr) + { + return xt::sqrt( + xt::nanmean(xt::square(xt::abs(arr - mean_arr)), -1) + ); + } + } +} + +#endif //EVALHYD_MATHS_HPP diff --git a/include/evalhyd/detail/probabilist/brier.hpp b/include/evalhyd/detail/probabilist/brier.hpp new file mode 100644 index 0000000000000000000000000000000000000000..387a773add7006c82078a2cd3ad1ce80c0b948a0 --- /dev/null +++ b/include/evalhyd/detail/probabilist/brier.hpp @@ -0,0 +1,1049 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_BRIER_HPP +#define EVALHYD_PROBABILIST_BRIER_HPP + +#include <limits> + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xindex_view.hpp> +#include <xtensor/xmasked_view.hpp> +#include <xtensor/xsort.hpp> +#include <xtensor/xmath.hpp> + + +// NOTE ------------------------------------------------------------------------ +// All equations in metrics below are following notations from +// "Wilks, D. S. (2011). Statistical methods in the atmospheric sciences. +// Amsterdam; Boston: Elsevier Academic Press. ISBN: 9780123850225". +// In particular, pp. 302-303, 332-333. +// ----------------------------------------------------------------------------- + +namespace evalhyd +{ + namespace probabilist + { + namespace elements + { + /// Determine observed realisation of threshold(s) exceedance. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (sites, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param is_high_flow_event + /// Whether events correspond to being above the threshold(s). + /// \return + /// Event observed outcome. + /// shape: (sites, thresholds, time) + template<class XD2a, class XD2b> + inline xt::xtensor<double, 3> calc_o_k( + const XD2a& q_obs, + const XD2b& q_thr, + bool is_high_flow_event + ) + { + if (is_high_flow_event) + { + // observations above threshold(s) + return xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()) + >= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis()); + } + else + { + // observations below threshold(s) + return xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()) + <= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis()); + } + + } + + /// Determine mean observed realisation of threshold(s) exceedance. + /// + /// \param o_k + /// Event observed outcome. + /// shape: (sites, thresholds, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Mean event observed outcome. + /// shape: (sites, lead times, subsets, samples, thresholds) + inline xt::xtensor<double, 5> calc_bar_o( + const xt::xtensor<double, 3>& o_k, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 5> bar_o = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr}); + + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto o_k_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), + xt::all(), xt::newaxis(), xt::all()), + xt::view(o_k, xt::all(), xt::newaxis(), + xt::newaxis(), xt::all(), xt::all()), + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto o_k_masked_sampled = + xt::view(o_k_masked, xt::all(), xt::all(), + xt::all(), xt::all(), b_exp[e]); + + // compute mean "climatology" relative frequency of the event + // $\bar{o} = \frac{1}{n} \sum_{k=1}^{n} o_k$ + xt::view(bar_o, xt::all(), xt::all(), xt::all(), e, xt::all()) = + xt::nanmean(o_k_masked_sampled, -1); + } + + return bar_o; + } + + /// Determine number of forecast members exceeding threshold(s) + /// + /// \param q_prd + /// Streamflow predictions. + /// shape: (sites, lead times, members, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param is_high_flow_event + /// Whether events correspond to being above the threshold(s). + /// \return + /// Number of forecast members exceeding threshold(s). + /// shape: (sites, lead times, thresholds, time) + template<class XD4, class XD2> + inline xt::xtensor<double, 4> calc_sum_f_k( + const XD4& q_prd, + const XD2& q_thr, + bool is_high_flow_event + ) + { + if (is_high_flow_event) + { + // determine if members are above threshold(s) + auto f_k = xt::view(q_prd, xt::all(), xt::all(), + xt::newaxis(), xt::all(), xt::all()) + >= xt::view(q_thr, xt::all(), xt::newaxis(), + xt::all(), xt::newaxis(), xt::newaxis()); + + // calculate how many members are above threshold(s) + return xt::sum(f_k, 3); + } + else + { + // determine if members are below threshold(s) + auto f_k = xt::view(q_prd, xt::all(), xt::all(), + xt::newaxis(), xt::all(), xt::all()) + <= xt::view(q_thr, xt::all(), xt::newaxis(), + xt::all(), xt::newaxis(), xt::newaxis()); + + // calculate how many members are below threshold(s) + return xt::sum(f_k, 3); + } + } + + /// Determine forecast probability of threshold(s) exceedance to occur. + /// + /// \param sum_f_k + /// Number of forecast members exceeding threshold(s). + /// shape: (sites, lead times, thresholds, time) + /// \param n_mbr + /// Number of ensemble members. + /// \return + /// Event probability forecast. + /// shape: (sites, lead times, thresholds, time) + inline xt::xtensor<double, 4> calc_y_k( + const xt::xtensor<double, 4>& sum_f_k, + std::size_t n_mbr + ) + { + // determine probability of threshold(s) exceedance + // /!\ probability calculation dividing by n (the number of + // members), not n+1 (the number of ranks) like in other metrics + return sum_f_k / n_mbr; + } + } + + namespace intermediate + { + /// Compute the Brier score for each time step. + /// + /// \param o_k + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param y_k + /// Event probability forecast. + /// shape: (sites, lead times, thresholds, time) + /// \return + /// Brier score for each threshold for each time step. + /// shape: (sites, lead times, thresholds, time) + inline xt::xtensor<double, 4> calc_bs( + const xt::xtensor<double, 3>& o_k, + const xt::xtensor<double, 4>& y_k + ) + { + // return computed Brier score(s) + // $bs = (o_k - y_k)^2$ + return xt::square( + xt::view(o_k, xt::all(), xt::newaxis(), + xt::all(), xt::all()) + - y_k + ); + } + } + + namespace metrics + { + /// Compute the Brier score (BS). + /// + /// \param bs + /// Brier score for each threshold for each time step. + /// shape: (sites, lead times, thresholds, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Brier score for each subset and for each threshold. + /// shape: (sites, lead times, subsets, samples, thresholds) + template <class XD2> + inline xt::xtensor<double, 5> calc_BS( + const xt::xtensor<double, 4>& bs, + const XD2& q_thr, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 5> BS = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto bs_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, + xt::newaxis(), xt::all()), + bs, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto bs_masked_sampled = + xt::view(bs_masked, xt::all(), xt::all(), + xt::all(), b_exp[e]); + + // calculate the mean over the time steps + // $BS = \frac{1}{n} \sum_{k=1}^{n} (o_k - y_k)^2$ + xt::view(BS, xt::all(), xt::all(), m, e, xt::all()) = + xt::nanmean(bs_masked_sampled, -1); + } + } + + // assign NaN where thresholds were not provided (i.e. set as NaN) + xt::masked_view( + BS, + xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(), + xt::newaxis(), xt::newaxis(), + xt::all())) + ) = NAN; + + return BS; + } + + /// Compute the X and Y axes of the reliability diagram + /// (`y_i`, the forecast probability; `bar_o_i`, the observed frequency;) + /// as well as the frequencies of the sampling histogram + /// (`N_i`, the number of forecasts of given probability `y_i`)'. + /// + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param o_k + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param y_k + /// Event probability forecast. + /// shape: (sites, lead times, thresholds, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// X and Y axes of the reliability diagram, and ordinates + /// (i.e. frequencies) of the sampling histogram, in this order. + /// shape: (sites, lead times, subsets, samples, thresholds, bins, axes) + template <class XD2> + inline xt::xtensor<double, 7> calc_REL_DIAG( + const XD2& q_thr, + const xt::xtensor<double, 3>& o_k, + const xt::xtensor<double, 4>& y_k, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 7> REL_DIAG = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr, + n_mbr + 1, std::size_t {3}}); + + // compute range of forecast values $y_i$ + auto y_i = xt::arange<double>(double(n_mbr + 1)) / n_mbr; + + xt::view(REL_DIAG, xt::all(), xt::all(), xt::all(), xt::all(), + xt::all(), xt::all(), 0) = y_i; + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto o_k_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), + m, xt::newaxis(), xt::all()), + xt::view(o_k, xt::all(), xt::newaxis(), + xt::all(), xt::all()), + NAN + ); + auto y_k_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), + m, xt::newaxis(), xt::all()), + y_k, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto o_k_masked_sampled = + xt::view(o_k_masked, xt::all(), xt::all(), + xt::all(), b_exp[e]); + auto y_k_masked_sampled = + xt::view(y_k_masked, xt::all(), xt::all(), + xt::all(), b_exp[e]); + auto t_msk_sampled = + xt::view(t_msk, xt::all(), xt::all(), m, + xt::newaxis(), b_exp[e]); + + // compute mask to subsample time steps belonging to same forecast bin + // (where bins are defined as the range of forecast values) + auto msk_bins = xt::equal( + // force evaluation to avoid segfault + xt::view(xt::eval(y_k_masked_sampled), + xt::all(), xt::all(), xt::all(), + xt::newaxis(), xt::all()), + xt::view(y_i, + xt::newaxis(), xt::newaxis(), xt::newaxis(), + xt::all(), xt::newaxis()) + ); + + // compute number of forecasts in each forecast bin $N_i$ + auto N_i = xt::eval(xt::sum(msk_bins, -1)); + + xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(), + xt::all(), 2) = N_i; + + // compute subsample relative frequency + // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$ + auto bar_o_i = xt::where( + N_i > 0, + xt::nansum( + xt::where( + msk_bins, + xt::view(o_k_masked_sampled, + xt::all(), xt::all(), + xt::all(), xt::newaxis(), + xt::all()), + 0. + ), + -1 + ) / N_i, + 0. + ); + + xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(), + xt::all(), 1) = bar_o_i; + } + } + + // assign NaN where thresholds were not provided (i.e. set as NaN) + xt::masked_view( + REL_DIAG, + xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(), + xt::newaxis(), xt::newaxis(), + xt::all(), xt::newaxis(), + xt::newaxis())) + ) = NAN; + + return REL_DIAG; + } + + /// Compute the calibration-refinement decomposition of the Brier score + /// into reliability, resolution, and uncertainty. + /// + /// BS = reliability - resolution + uncertainty + /// + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param bar_o + /// Mean event observed outcome. + /// shape: (sites, lead times, subsets, samples, thresholds) + /// \param REL_DIAG + /// Axes of the reliability diagram and the sampling histogram. + /// shape: (sites, lead times, thresholds, time) + /// \param t_counts + /// Time step counts for the period. + /// shape: (sites, lead times, subsets, samples) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Brier score components (reliability, resolution, uncertainty) + /// for each subset and for each threshold. + /// shape: (sites, lead times, subsets, samples, thresholds, bins, axes) + template <class XD2> + inline xt::xtensor<double, 6> calc_BS_CRD( + const XD2& q_thr, + const xt::xtensor<double, 5>& bar_o, + const xt::xtensor<double, 7>& REL_DIAG, + const xt::xtensor<double, 4>& t_counts, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 6> BS_CRD = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr, + std::size_t {3}}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // retrieve length of period + auto l = xt::view(t_counts, xt::all(), xt::all(), + m, xt::newaxis(), e); + + // retrieve range of forecast values $y_i$ + auto y_i = xt::eval( + xt::view(REL_DIAG, xt::all(), xt::all(), m, e, + xt::all(), xt::all(), 0) + ); + + // retrieve number of forecasts in each forecast bin $N_i$ + auto N_i = xt::eval( + xt::view(REL_DIAG, xt::all(), xt::all(), m, e, + xt::all(), xt::all(), 2) + ); + + // retrieve subsample relative frequency + // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$ + auto bar_o_i = xt::eval( + xt::view(REL_DIAG, xt::all(), xt::all(), m, e, + xt::all(), xt::all(), 1) + ); + + // retrieve mean event observed outcome $bar_o$ + auto _bar_o = xt::view(bar_o, xt::all(), xt::all(), + m, e, xt::all()); + // (reshape to insert size-one axis for "bins") + auto _bar_o_ = xt::view(_bar_o, xt::all(), xt::all(), + xt::all(), xt::newaxis()); + + // calculate reliability = + // $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$ + xt::view(BS_CRD, xt::all(), xt::all(), m, e, xt::all(), 0) = + xt::nansum( + xt::square(y_i - bar_o_i) * N_i, + -1 + ) / l; + + // calculate resolution = + // $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$ + xt::view(BS_CRD, xt::all(), xt::all(), m, e, xt::all(), 1) = + xt::nansum( + xt::square(bar_o_i - _bar_o_) * N_i, + -1 + ) / l; + + // calculate uncertainty = $\bar{o} (1 - \bar{o})$ + xt::view(BS_CRD, xt::all(), xt::all(), m, e, xt::all(), 2) = + _bar_o * (1 - _bar_o); + } + } + + // assign NaN where thresholds were not provided (i.e. set as NaN) + xt::masked_view( + BS_CRD, + xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(), + xt::newaxis(), xt::newaxis(), + xt::all(), xt::newaxis())) + ) = NAN; + + return BS_CRD; + } + + /// Compute the likelihood-base rate decomposition of the Brier score + /// into type 2 bias, discrimination, and sharpness (a.k.a. refinement). + /// + /// BS = type 2 bias - discrimination + sharpness + /// + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param o_k + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param y_k + /// Event probability forecast. + /// shape: (sites, lead times, thresholds, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param t_counts + /// Time step counts for the period. + /// shape: (sites, lead times, subsets, samples) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Brier score components (type 2 bias, discrimination, sharpness) + /// for each subset and for each threshold. + /// shape: (sites, lead times, subsets, samples, thresholds, components) + template <class XD2> + inline xt::xtensor<double, 6> calc_BS_LBD( + const XD2& q_thr, + const xt::xtensor<double, 3>& o_k, + const xt::xtensor<double, 4>& y_k, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + const xt::xtensor<double, 4>& t_counts, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // declare internal variables + // shape: (sites, lead times, bins, thresholds, time) + xt::xtensor<double, 5> msk_bins; + // shape: (sites, lead times, thresholds) + xt::xtensor<double, 3> bar_y; + // shape: (sites, lead times, bins, thresholds) + xt::xtensor<double, 4> M_j, bar_y_j; + // shape: (bins,) + xt::xtensor<double, 1> o_j; + + // set the range of observed values $o_j$ + o_j = {0., 1.}; + + // declare and initialise output variable + xt::xtensor<double, 6> BS_LBD = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr, + std::size_t {3}}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto o_k_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), + m, xt::newaxis(), xt::all()), + xt::view(o_k, xt::all(), xt::newaxis(), + xt::all(), xt::all()), + NAN + ); + auto y_k_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), + m, xt::newaxis(), xt::all()), + y_k, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto o_k_masked_sampled = + xt::view(o_k_masked, xt::all(), xt::all(), + xt::all(), b_exp[e]); + auto y_k_masked_sampled = + xt::view(y_k_masked, xt::all(), xt::all(), + xt::all(), b_exp[e]); + + // retrieve length of period + auto l = xt::view(t_counts, xt::all(), xt::all(), + m, xt::newaxis(), e); + + // compute mask to subsample time steps belonging to same observation bin + // (where bins are defined as the range of forecast values) + msk_bins = xt::equal( + // force evaluation to avoid segfault + xt::view(xt::eval(o_k_masked_sampled), + xt::all(), xt::all(), xt::newaxis(), + xt::all(), xt::all()), + xt::view(o_j, + xt::newaxis(), xt::newaxis(), xt::all(), + xt::newaxis(), xt::newaxis()) + ); + + // compute number of observations in each observation bin $M_j$ + M_j = xt::nansum(msk_bins, -1); + + // compute subsample relative frequency + // $\bar{y_j} = \frac{1}{M_j} \sum_{k \in M_j} y_k$ + bar_y_j = xt::where( + M_j > 0, + xt::nansum( + xt::where( + msk_bins, + xt::view(y_k_masked_sampled, + xt::all(), xt::all(), + xt::newaxis(), + xt::all(), xt::all()), + 0. + ), + -1 + ) / M_j, + 0. + ); + + // compute mean "climatology" forecast probability + // $\bar{y} = \frac{1}{n} \sum_{k=1}^{n} y_k$ + bar_y = xt::nanmean(y_k_masked_sampled, -1); + + // calculate type 2 bias = + // $\frac{1}{n} \sum_{j=1}^{J} M_j (o_j - \bar{y_j})^2$ + xt::view(BS_LBD, xt::all(), xt::all(), m, e, xt::all(), 0) = + xt::nansum( + xt::square( + xt::view(o_j, xt::newaxis(), + xt::newaxis(), xt::all(), + xt::newaxis()) + - bar_y_j + ) * M_j, + 2 + ) / l; + + // calculate discrimination = + // $\frac{1}{n} \sum_{j=1}^{J} M_j (\bar{y_j} - \bar{y})^2$ + xt::view(BS_LBD, xt::all(), xt::all(), m, e, xt::all(), 1) = + xt::nansum( + xt::square( + bar_y_j + - xt::view(bar_y, + xt::all(), xt::all(), + xt::newaxis(), + xt::all()) + ) * M_j, + 2 + ) / l; + + // calculate sharpness = + // $\frac{1}{n} \sum_{k=1}^{n} (\bar{y_k} - \bar{y})^2$ + xt::view(BS_LBD, xt::all(), xt::all(), m, e, xt::all(), 2) = + xt::nansum( + xt::square( + y_k_masked_sampled + - xt::view(bar_y, xt::all(), + xt::all(), xt::all(), + xt::newaxis()) + ), + -1 + ) / l; + } + + } + + // assign NaN where thresholds were not provided (i.e. set as NaN) + xt::masked_view( + BS_LBD, + xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(), + xt::newaxis(), xt::newaxis(), + xt::all(), xt::newaxis())) + ) = NAN; + + return BS_LBD; + } + + /// Compute the Brier skill score (BSS). + /// + /// \param bs + /// Brier score for each threshold for each time step. + /// shape: (sites, lead times, thresholds, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param o_k + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param bar_o + /// Mean event observed outcome. + /// shape: (sites, lead times, subsets, samples, thresholds) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Brier skill score for each subset and for each threshold. + /// shape: (sites, lead times, subsets, samples, thresholds) + template <class XD2> + inline xt::xtensor<double, 5> calc_BSS( + const xt::xtensor<double, 4>& bs, + const XD2& q_thr, + const xt::xtensor<double, 3>& o_k, + const xt::xtensor<double, 5>& bar_o, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // declare and initialise output variable + xt::xtensor<double, 5> BSS = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto o_k_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), + m, xt::newaxis(), xt::all()), + xt::view(o_k, xt::all(), xt::newaxis(), + xt::all(), xt::all()), + NAN + ); + auto bs_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, + xt::newaxis(), xt::all()), + bs, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto o_k_masked_sampled = + xt::view(o_k_masked, xt::all(), xt::all(), + xt::all(), b_exp[e]); + auto bs_masked_sampled = + xt::view(bs_masked, xt::all(), xt::all(), + xt::all(), b_exp[e]); + auto bar_o_sampled = + xt::view(bar_o, xt::all(), xt::all(), + xt::all(), e, xt::all()); + + // calculate reference Brier score(s) + // $bs_{ref} = \frac{1}{n} \sum_{k=1}^{n} (o_k - \bar{o})^2$ + xt::xtensor<double, 4> bs_ref = + xt::nanmean( + xt::square( + o_k_masked_sampled - + xt::view( + bar_o_sampled, xt::all(), + xt::all(), m, xt::all(), + xt::newaxis() + ) + ), + -1, + xt::keep_dims + ); + + // compute Brier skill score(s) + // $BSS = \frac{1}{n} \sum_{k=1}^{n} 1 - \frac{bs}{bs_{ref}} + xt::view(BSS, xt::all(), xt::all(), m, e, xt::all()) = + xt::nanmean( + xt::where( + xt::equal(bs_ref, 0), + - std::numeric_limits<double>::infinity(), + 1 - (bs_masked_sampled / bs_ref) + ), + -1 + ); + } + } + + // assign NaN where thresholds were not provided (i.e. set as NaN) + xt::masked_view( + BSS, + xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(), + xt::newaxis(), xt::newaxis(), + xt::all())) + ) = NAN; + + return BSS; + } + + /// Compute the continuous rank probability score based on the + /// integration over 101 Brier scores (CRPS_FROM_BS), i.e. using the + /// observed minimum, the 99 observed percentiles, and the observed + /// maximum as the exceedance thresholds. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (sites, time) + /// \param q_prd + /// Streamflow predictions. + /// shape: (sites, lead times, members, time) + /// \param is_high_flow_event + /// Whether events correspond to being above the threshold(s). + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// CRPS for each subset and for each threshold. + /// shape: (sites, lead times, subsets, samples) + template <class XD2, class XD4> + inline xt::xtensor<double, 4> calc_CRPS_FROM_BS( + const XD2& q_obs, + const XD4& q_prd, + bool is_high_flow_event, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // declare and initialise output variable + xt::xtensor<double, 4> CRPS_FROM_BS = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto q_obs_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), + xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()), + NAN + ); + auto q_prd_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, + xt::newaxis(), xt::all()), + q_prd, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + for (std::size_t l = 0; l < n_ldt; l++) + { + // compute empirical thresholds from 99 observed quantiles + xt::xtensor<double, 2> thr = + xt::zeros<double>({n_sit, std::size_t {101}}); + + // /!\ need to compute quantiles one site at a time + // because there is no `xt::nanquantile`, so + // need to filter NaN before computing quantiles + for (std::size_t s = 0; s < n_sit; s++) + { + auto obs = xt::view(q_obs_masked, s, l, b_exp[e]); + + auto obs_filtered = xt::filter( + obs, !xt::isnan(obs) + ); + + if (obs_filtered.size() > 0) + { + xt::view(thr, s, xt::all()) = xt::quantile( + obs_filtered, + xt::arange<double>(0.00, 1.01, 0.01) + ); + } + else + { + xt::view(thr, s, xt::all()) = NAN; + } + } + + // compute observed and predicted event outcomes + auto o_k = elements::calc_o_k( + xt::view(q_obs_masked, xt::all(), l, + xt::all()), + thr, is_high_flow_event + ); + + auto y_k = elements::calc_y_k( + elements::calc_sum_f_k( + xt::view(q_prd_masked, xt::all(), l, + xt::newaxis(), xt::all(), + xt::all()), + thr, is_high_flow_event + ), + n_mbr + ); + + // compute 99 Brier scores + auto bs = intermediate::calc_bs(o_k, y_k); + + auto bs_masked = xt::where( + xt::view(t_msk, xt::all(), l, xt::newaxis(), + m, xt::newaxis(), xt::all()), + bs, + NAN + ); + + auto bs_masked_sampled = xt::view( + bs_masked, xt::all(), xt::all(), xt::all(), + b_exp[e] + ); + + auto BS = xt::nanmean(bs_masked_sampled, -1); + + // compute CRPS from integration over 99 Brier scores + xt::view(CRPS_FROM_BS, xt::all(), l, m, e) = + // xt::trapz(y, x, axis=1) + xt::trapz( + xt::view(BS, xt::all(), 0, xt::all()), + thr, + 1 + ); + } + } + } + + return CRPS_FROM_BS; + } + } + } +} + +#endif //EVALHYD_PROBABILIST_BRIER_HPP diff --git a/include/evalhyd/detail/probabilist/cdf.hpp b/include/evalhyd/detail/probabilist/cdf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e0cf0b146c8a9d65554bc43e416075c1b4d83c1a --- /dev/null +++ b/include/evalhyd/detail/probabilist/cdf.hpp @@ -0,0 +1,201 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_CDF_HPP +#define EVALHYD_PROBABILIST_CDF_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xmath.hpp> + + +namespace evalhyd +{ + namespace probabilist + { + namespace intermediate + { + /// Compute the CRPS for each time step as the distance between the + /// observed and empirical (i.e. constructed from the ensemble + /// predictions) quadratic cumulative density functions (CDFs). + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (sites, time) + /// \param q_qnt + /// Streamflow prediction quantiles. + /// shape: (sites, lead times, quantiles, time) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_tim + /// Number of time steps. + /// \return + /// CRPS for each time step. + /// shape: (sites, lead times, time) + template <class XD2> + inline xt::xtensor<double, 3> calc_crps_from_ecdf( + const XD2& q_obs, + const xt::xtensor<double, 4>& q_qnt, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_tim + ) + { + // notations below follow Hersbach (2000) + // https://doi.org/10.1175/1520-0434(2000)015<0559:DOTCRP>2.0.CO;2 + + // declare and initialise internal variables + xt::xtensor<double, 4> alpha_i = + xt::zeros<double>({n_mbr + 1, n_sit, n_ldt, n_tim}); + + xt::xtensor<double, 4> beta_i = + xt::zeros<double>({n_mbr + 1, n_sit, n_ldt, n_tim}); + + // case x_a < x_1 + // i.e. observation is an outlier before predictive range + auto x_a = xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()); + auto x_1 = xt::view(q_qnt, xt::all(), xt::all(), 0, xt::all()); + + auto is_before = x_a < x_1; + xt::view(beta_i, 0, xt::all()) = xt::where( + is_before, x_1 - x_a, xt::view(beta_i, 0) + ); + + for (std::size_t m = 0; m < n_mbr - 1; m++) + { + auto x_i = xt::view(q_qnt, xt::all(), xt::all(), m, xt::all()); + auto x_ip1 = xt::view(q_qnt, xt::all(), xt::all(), m + 1, xt::all()); + + // case x_a < x_i + // i.e. observation below given member + auto is_below = x_a < x_i; + xt::view(beta_i, m + 1, xt::all()) = xt::where( + is_below, x_ip1 - x_i, xt::view(beta_i, m + 1) + ); + + // case x_i <= x_a <= x_{i+1} + // i.e. observation between given member and next member + auto is_between = (x_i <= x_a) && (x_a <= x_ip1); + xt::view(alpha_i, m + 1, xt::all()) = xt::where( + is_between, x_a - x_i, xt::view(alpha_i, m + 1) + ); + xt::view(beta_i, m + 1, xt::all()) = xt::where( + is_between, x_ip1 - x_a, xt::view(beta_i, m + 1) + ); + + // case x_a > x_{i+1} + // i.e. observation above next member + auto is_above = x_a > x_ip1; + xt::view(alpha_i, m + 1, xt::all()) = xt::where( + is_above, x_ip1 - x_i, xt::view(alpha_i, m + 1) + ); + } + + // case x_a > x_N + // i.e. observation is an outlier beyond predictive range + auto x_N = xt::view(q_qnt, xt::all(), xt::all(), n_mbr - 1, xt::all()); + + auto is_beyond = x_a > x_N; + xt::view(alpha_i, n_mbr, xt::all()) = xt::where( + is_beyond, x_a - x_N, xt::view(alpha_i, n_mbr) + ); + + // compute crps as difference between the quadratic CDFs + auto p_i = xt::eval( + xt::view( + xt::arange<double>(n_mbr + 1) / n_mbr, + xt::all(), xt::newaxis(), xt::newaxis(), + xt::newaxis() + ) + ); + + auto crps_from_ecdf = xt::sum( + (alpha_i * xt::square(p_i)) + + (beta_i * xt::square(1 - p_i)), + 0 + ); + + return crps_from_ecdf; + } + } + + namespace metrics + { + /// Compute the continuous rank probability score based on the + /// integration over the quadratic difference between the observed + /// and empirical cumulative density functions (CRPS_FROM_ECDF). + /// + /// \param crps_from_ecdf + /// CRPS for each time step. + /// shape: (sites, lead times, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_tim + /// Number of time steps. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// CRPS. + /// shape: (sites, lead times, subsets, samples) + inline xt::xtensor<double, 4> calc_CRPS_FROM_ECDF( + const xt::xtensor<double, 3>& crps_from_ecdf, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 4> CRPS_FROM_ECDF = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto crps_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), + crps_from_ecdf, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto crps_masked_sampled = xt::view( + crps_masked, xt::all(), xt::all(), b_exp[e] + ); + + // calculate the mean over the time steps + xt::view(CRPS_FROM_ECDF, xt::all(), xt::all(), m, e) = + xt::nanmean(crps_masked_sampled, -1); + } + } + + return CRPS_FROM_ECDF; + } + } + } +} + +#endif //EVALHYD_PROBABILIST_CDF_HPP \ No newline at end of file diff --git a/include/evalhyd/detail/probabilist/contingency.hpp b/include/evalhyd/detail/probabilist/contingency.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0c3aa0ecd0fd669558eb7b69d0e602aca2ac6fad --- /dev/null +++ b/include/evalhyd/detail/probabilist/contingency.hpp @@ -0,0 +1,544 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_CONTINGENCY_HPP +#define EVALHYD_PROBABILIST_CONTINGENCY_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xmasked_view.hpp> +#include <xtensor/xmath.hpp> + + +// NOTE ------------------------------------------------------------------------ +// All equations in metrics below are following notations from +// "Wilks, D. S. (2011). Statistical methods in the atmospheric sciences. +// Amsterdam; Boston: Elsevier Academic Press. ISBN: 9780123850225". +// In particular, pp. 302-303, 332-333. +// ----------------------------------------------------------------------------- + +namespace evalhyd +{ + namespace probabilist + { + namespace elements + { + // Contingency table: + // + // OBS + // Y N + // +-----+-----+ a: hits + // Y | a | b | b: false alarms + // PRD +-----+-----+ c: misses + // N | c | d | d: correct rejections + // +-----+-----+ + // + + /// Determine alerts based on forecast. + /// + /// \param sum_f_k + /// Number of forecast members exceeding threshold(s). + /// shape: (sites, lead times, thresholds, time) + /// \param n_mbr + /// Number of ensemble members. + /// \return + /// Alerts based on forecast. + /// shape: (sites, lead times, levels, thresholds, time) + inline xt::xtensor<double, 5> calc_a_k( + const xt::xtensor<double, 4>& sum_f_k, + std::size_t n_mbr + ) + { + // compute range of alert levels $alert_lvl$ + // (i.e. number of members that must forecast event + // for alert to be raised) + auto alert_lvl = xt::arange<double>(double(n_mbr + 1)); + + // determine whether forecast yield alert + return xt::view(sum_f_k, xt::all(), xt::all(), xt::newaxis(), + xt::all(), xt::all()) + >= xt::view(alert_lvl, xt::newaxis(), xt::newaxis(), + xt::all(), xt::newaxis(), xt::newaxis()); + } + + /// Determine hits ('a' in contingency table). + /// + /// \param o_k + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param a_k + /// Alerts based on forecast. + /// shape: (sites, lead times, levels, thresholds, time) + /// \return + /// Hits. + /// shape: (sites, lead times, levels, thresholds, time) + inline xt::xtensor<double, 5> calc_ct_a( + const xt::xtensor<double, 3>& o_k, + const xt::xtensor<double, 5>& a_k + ) + { + return xt::equal(xt::view(o_k, xt::all(), xt::newaxis(), + xt::newaxis(), xt::all(), xt::all()), + 1.) + && xt::equal(a_k, 1.); + } + + /// Determine false alarms ('b' in contingency table). + /// + /// \param o_k + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param a_k + /// Alerts based on forecast. + /// shape: (sites, lead times, levels, thresholds, time) + /// \return + /// False alarms. + /// shape: (sites, lead times, levels, thresholds, time) + inline xt::xtensor<double, 5> calc_ct_b( + const xt::xtensor<double, 3>& o_k, + const xt::xtensor<double, 5>& a_k + ) + { + return xt::equal(xt::view(o_k, xt::all(), xt::newaxis(), + xt::newaxis(), xt::all(), xt::all()), + 0.) + && xt::equal(a_k, 1.); + } + + /// Determine misses ('c' in contingency table). + /// + /// \param o_k + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param a_k + /// Alerts based on forecast. + /// shape: (sites, lead times, levels, thresholds, time) + /// \return + /// Misses. + /// shape: (sites, lead times, levels, thresholds, time) + inline xt::xtensor<double, 5> calc_ct_c( + const xt::xtensor<double, 3>& o_k, + const xt::xtensor<double, 5>& a_k + ) + { + return xt::equal(xt::view(o_k, xt::all(), xt::newaxis(), + xt::newaxis(), xt::all(), xt::all()), + 1.) + && xt::equal(a_k, 0.); + } + + /// Determine correct rejections ('d' in contingency table). + /// + /// \param o_k + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param a_k + /// Alerts based on forecast. + /// shape: (sites, lead times, levels, thresholds, time) + /// \return + /// Correct rejections. + /// shape: (sites, lead times, levels, thresholds, time) + inline xt::xtensor<double, 5> calc_ct_d( + const xt::xtensor<double, 3>& o_k, + const xt::xtensor<double, 5>& a_k + ) + { + return xt::equal(xt::view(o_k, xt::all(), xt::newaxis(), + xt::newaxis(), xt::all(), xt::all()), + 0.) + && xt::equal(a_k, 0.); + } + } + + namespace intermediate + { + /// Compute the probability of detection for each time step. + /// + /// \param ct_a + /// Hits. + /// shape: (sites, lead times, levels, thresholds, time) + /// \param ct_c + /// Misses. + /// shape: (sites, lead times, levels, thresholds, time) + /// \return + /// Probability of detection for each time step. + /// shape: (sites, lead times, levels, thresholds, time) + inline xt::xtensor<double, 5> calc_pod( + const xt::xtensor<double, 5>& ct_a, + const xt::xtensor<double, 5>& ct_c + ) + { + return ct_a / (ct_a + ct_c); + } + + /// Compute the probability of false detection for each time step. + /// + /// \param ct_b + /// False alarms. + /// shape: (sites, lead times, levels, thresholds, time) + /// \param ct_d + /// Correct rejections. + /// shape: (sites, lead times, levels, thresholds, time) + /// \return + /// Probability of false detection for each time step. + /// shape: (sites, lead times, levels, thresholds, time) + inline xt::xtensor<double, 5> calc_pofd( + const xt::xtensor<double, 5>& ct_b, + const xt::xtensor<double, 5>& ct_d + ) + { + return ct_b / (ct_b + ct_d); + } + + /// Compute the false alarm ratio for each time step. + /// + /// \param ct_a + /// Hits. + /// shape: (sites, lead times, levels, thresholds, time) + /// \param ct_b + /// False alarms. + /// shape: (sites, lead times, levels, thresholds, time) + /// \return + /// False alarm ratio for each time step. + /// shape: (sites, lead times, levels, thresholds, time) + inline xt::xtensor<double, 5> calc_far( + const xt::xtensor<double, 5>& ct_a, + const xt::xtensor<double, 5>& ct_b + ) + { + return ct_b / (ct_a + ct_b); + } + + /// Compute the critical success index for each time step. + /// + /// \param ct_a + /// Hits. + /// shape: (sites, lead times, levels, thresholds, time) + /// \param ct_b + /// False alarms. + /// shape: (sites, lead times, levels, thresholds, time) + /// \param ct_c + /// Misses. + /// shape: (sites, lead times, levels, thresholds, time) + /// \return + /// Critical success index for each time step. + /// shape: (sites, lead times, levels, thresholds, time) + inline xt::xtensor<double, 5> calc_csi( + const xt::xtensor<double, 5>& ct_a, + const xt::xtensor<double, 5>& ct_b, + const xt::xtensor<double, 5>& ct_c + ) + { + return ct_a / (ct_a + ct_b + ct_c); + } + } + + namespace metrics + { + namespace detail + { + template <class XD2> + inline xt::xtensor<double, 6> calc_METRIC_from_metric( + const xt::xtensor<double, 5>& metric, + const XD2& q_thr, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 6> METRIC = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, + n_mbr + 1, n_thr}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto metric_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, + xt::newaxis(), xt::newaxis(), + xt::all()), + metric, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto metric_masked_sampled = + xt::view(metric_masked, xt::all(), xt::all(), + xt::all(), xt::all(), b_exp[e]); + + // calculate the mean over the time steps + xt::view(METRIC, xt::all(), xt::all(), m, e, + xt::all(), xt::all()) = + xt::nanmean(metric_masked_sampled, -1); + } + } + + // assign NaN where thresholds were not provided (i.e. set as NaN) + xt::masked_view( + METRIC, + xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(), + xt::newaxis(), xt::newaxis(), + xt::newaxis(), xt::all())) + ) = NAN; + + return METRIC; + } + } + + /// Compute the probability of detection (POD), + /// also known as 'hit rate'. + /// + /// \param pod + /// Probability of detection for each time step. + /// shape: (sites, lead times, levels, thresholds, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Probabilities of detection. + /// shape: (sites, lead times, subsets, samples, levels, thresholds) + template <class XD2> + inline xt::xtensor<double, 6> calc_POD( + const xt::xtensor<double, 5>& pod, + const XD2& q_thr, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + return detail::calc_METRIC_from_metric( + pod, q_thr, t_msk, b_exp, + n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp + ); + } + + /// Compute the probability of detection (POFD), + /// also known as 'false alarm rate'. + /// + /// \param pofd + /// Probability of false detection for each time step. + /// shape: (sites, lead times, levels, thresholds, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Probabilities of false detection. + /// shape: (sites, lead times, subsets, samples, levels, thresholds) + template <class XD2> + inline xt::xtensor<double, 6> calc_POFD( + const xt::xtensor<double, 5>& pofd, + const XD2& q_thr, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + return detail::calc_METRIC_from_metric( + pofd, q_thr, t_msk, b_exp, + n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp + ); + } + + /// Compute the false alarm ratio (FAR). + /// + /// \param far + /// False alarm ratio for each time step. + /// shape: (sites, lead times, levels, thresholds, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// False alarm ratios. + /// shape: (sites, lead times, subsets, samples, levels, thresholds) + template <class XD2> + inline xt::xtensor<double, 6> calc_FAR( + const xt::xtensor<double, 5>& far, + const XD2& q_thr, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + return detail::calc_METRIC_from_metric( + far, q_thr, t_msk, b_exp, + n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp + ); + } + + /// Compute the critical success index (CSI). + /// + /// \param csi + /// Critical success index for each time step. + /// shape: (sites, lead times, levels, thresholds, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Critical success indices. + /// shape: (sites, lead times, subsets, samples, levels, thresholds) + template <class XD2> + inline xt::xtensor<double, 6> calc_CSI( + const xt::xtensor<double, 5>& csi, + const XD2& q_thr, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_thr, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + return detail::calc_METRIC_from_metric( + csi, q_thr, t_msk, b_exp, + n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp + ); + } + + /// Compute the relative operating characteristic skill score (ROCSS). + /// + /// \param POD + /// Probabilities of detection. + /// shape: (sites, lead times, subsets, samples, levels, thresholds) + /// \param POFD + /// Probabilities of false detection. + /// shape: (sites, lead times, subsets, samples, levels, thresholds) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (sites, thresholds) + /// \return + /// ROC skill scores. + /// shape: (sites, lead times, subsets, samples, thresholds) + template <class XD2> + inline xt::xtensor<double, 5> calc_ROCSS( + const xt::xtensor<double, 6>& POD, + const xt::xtensor<double, 6>& POFD, + const XD2& q_thr + ) + { + // compute the area under the ROC curve + // xt::trapz(y, x, axis=4) + // (note: taking the opposite of the integration results + // because POD/POFD values are in decreasing order) + auto A = - xt::trapz(POD, POFD, 4); + + // compute the ROC skill score + // $SS_{ROC} = \frac{A - A_{random}}{A_{perfect} - A_{random}}$ + // $SS_{ROC} = \frac{A - 0.5}{1. - 0.5} = 2A - 1$ + auto ROCSS = xt::eval((2. * A) - 1.); + + // assign NaN where thresholds were not provided (i.e. set as NaN) + xt::masked_view( + ROCSS, + xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(), + xt::newaxis(), xt::newaxis(), + xt::all())) + ) = NAN; + + return ROCSS; + } + } + } +} + +#endif //EVALHYD_PROBABILIST_CONTINGENCY_HPP \ No newline at end of file diff --git a/include/evalhyd/detail/probabilist/diagnostics.hpp b/include/evalhyd/detail/probabilist/diagnostics.hpp new file mode 100644 index 0000000000000000000000000000000000000000..84ee988fb1f5e577dd1f73fa6c76b2beb04b32db --- /dev/null +++ b/include/evalhyd/detail/probabilist/diagnostics.hpp @@ -0,0 +1,64 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_DIAGNOSTICS_HPP +#define EVALHYD_PROBABILIST_DIAGNOSTICS_HPP + +namespace evalhyd +{ + namespace probabilist + { + namespace elements + { + /// Counts the number of time steps available in given period. + /// + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Time step counts. + /// shape: (sites, lead times, subsets, samples) + inline xt::xtensor<double, 4> calc_t_counts( + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 4> t_counts = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto t_msk_sampled = + xt::view(t_msk, xt::all(), xt::all(), xt::all(), b_exp[e]); + + // calculate the mean over the time steps + xt::view(t_counts, xt::all(), xt::all(), xt::all(), e) = + xt::sum(t_msk_sampled, -1); + } + + return t_counts; + } + } + } +} + +#endif //EVALHYD_PROBABILIST_DIAGNOSTICS_HPP diff --git a/include/evalhyd/detail/probabilist/evaluator.hpp b/include/evalhyd/detail/probabilist/evaluator.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6293ea29e546df8f9f3756608d213aa4e5773a9e --- /dev/null +++ b/include/evalhyd/detail/probabilist/evaluator.hpp @@ -0,0 +1,856 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_EVALUATOR_HPP +#define EVALHYD_PROBABILIST_EVALUATOR_HPP + +#include <stdexcept> +#include <vector> + +#include <xtl/xoptional.hpp> +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> + +#include "diagnostics.hpp" +#include "brier.hpp" +#include "cdf.hpp" +#include "quantiles.hpp" +#include "contingency.hpp" +#include "ranks.hpp" +#include "intervals.hpp" +#include "multivariate.hpp" + + +namespace evalhyd +{ + namespace probabilist + { + template <class XD2, class XD4, class XB4> + class Evaluator + { + private: + // members for input data + const XD2& q_obs; + const XD4& q_prd; + // members for optional input data + const XD2& _q_thr; + const xt::xtensor<double, 1>& _c_lvl; + xtl::xoptional<const std::string, bool> _events; + xt::xtensor<bool, 4> t_msk; + const std::vector<xt::xkeep_slice<int>>& b_exp; + + // member for "reproducible randomness" + const long int random_seed; + + // members for dimensions + std::size_t n_sit; + std::size_t n_ldt; + std::size_t n_tim; + std::size_t n_msk; + std::size_t n_mbr; + std::size_t n_thr; + std::size_t n_itv; + std::size_t n_exp; + + // members for computational elements + // > Diagnostics + xtl::xoptional<xt::xtensor<double, 4>, bool> t_counts; + // > Brier-based + xtl::xoptional<xt::xtensor<double, 3>, bool> o_k; + xtl::xoptional<xt::xtensor<double, 5>, bool> bar_o; + xtl::xoptional<xt::xtensor<double, 4>, bool> sum_f_k; + xtl::xoptional<xt::xtensor<double, 4>, bool> y_k; + // > Quantiles-based + xtl::xoptional<xt::xtensor<double, 4>, bool> q_qnt; + // > Contingency table-based + xtl::xoptional<xt::xtensor<double, 5>, bool> a_k; + xtl::xoptional<xt::xtensor<double, 5>, bool> ct_a; + xtl::xoptional<xt::xtensor<double, 5>, bool> ct_b; + xtl::xoptional<xt::xtensor<double, 5>, bool> ct_c; + xtl::xoptional<xt::xtensor<double, 5>, bool> ct_d; + // > Ranks-based + xtl::xoptional<xt::xtensor<double, 3>, bool> r_k; + xtl::xoptional<xt::xtensor<double, 5>, bool> o_j; + // > Intervals-based + xtl::xoptional<xt::xtensor<double, 5>, bool> itv_bnds; + xtl::xoptional<xt::xtensor<double, 4>, bool> obs_in_itv; + xtl::xoptional<xt::xtensor<double, 4>, bool> itv_width; + xtl::xoptional<xt::xtensor<double, 6>, bool> clim_bnds; + + // members for intermediate evaluation metrics + // (i.e. before the reduction along the temporal axis) + // > Brier-based + xtl::xoptional<xt::xtensor<double, 4>, bool> bs; + // > CDF-based + xtl::xoptional<xt::xtensor<double, 3>, bool> crps_from_ecdf; + // > Quantiles-based + xtl::xoptional<xt::xtensor<double, 4>, bool> qs; + xtl::xoptional<xt::xtensor<double, 3>, bool> crps_from_qs; + // > Contingency table-based + xtl::xoptional<xt::xtensor<double, 5>, bool> pod; + xtl::xoptional<xt::xtensor<double, 5>, bool> pofd; + xtl::xoptional<xt::xtensor<double, 5>, bool> far; + xtl::xoptional<xt::xtensor<double, 5>, bool> csi; + // > Intervals-based + xtl::xoptional<xt::xtensor<double, 4>, bool> ws; + // > Multi-variate + xtl::xoptional<xt::xtensor<double, 2>, bool> es; + + // members for evaluation metrics + // > Brier-based + xtl::xoptional<xt::xtensor<double, 5>, bool> BS; + xtl::xoptional<xt::xtensor<double, 7>, bool> REL_DIAG; + xtl::xoptional<xt::xtensor<double, 6>, bool> BS_CRD; + xtl::xoptional<xt::xtensor<double, 6>, bool> BS_LBD; + xtl::xoptional<xt::xtensor<double, 5>, bool> BSS; + xtl::xoptional<xt::xtensor<double, 4>, bool> CRPS_FROM_BS; + // > CDF-based + xtl::xoptional<xt::xtensor<double, 4>, bool> CRPS_FROM_ECDF; + // > Quantiles-based + xtl::xoptional<xt::xtensor<double, 5>, bool> QS; + xtl::xoptional<xt::xtensor<double, 4>, bool> CRPS_FROM_QS; + // > Contingency table-based + xtl::xoptional<xt::xtensor<double, 6>, bool> POD; + xtl::xoptional<xt::xtensor<double, 6>, bool> POFD; + xtl::xoptional<xt::xtensor<double, 6>, bool> FAR; + xtl::xoptional<xt::xtensor<double, 6>, bool> CSI; + xtl::xoptional<xt::xtensor<double, 5>, bool> ROCSS; + // > Ranks-based + xtl::xoptional<xt::xtensor<double, 5>, bool> RANK_HIST; + xtl::xoptional<xt::xtensor<double, 4>, bool> DS; + xtl::xoptional<xt::xtensor<double, 4>, bool> AS; + // > Intervals-based + xtl::xoptional<xt::xtensor<double, 5>, bool> CR; + xtl::xoptional<xt::xtensor<double, 5>, bool> AW; + xtl::xoptional<xt::xtensor<double, 5>, bool> AWN; + xtl::xoptional<xt::xtensor<double, 5>, bool> AWI; + xtl::xoptional<xt::xtensor<double, 5>, bool> WS; + xtl::xoptional<xt::xtensor<double, 5>, bool> WSS; + // > Multi-variate + xtl::xoptional<xt::xtensor<double, 4>, bool> ES; + + // methods to get optional parameters + auto get_q_thr() + { + if (_q_thr.size() < 1) + { + throw std::runtime_error( + "threshold-based metric requested, " + "but *q_thr* not provided" + ); + } + else{ + return _q_thr; + } + } + + auto get_c_lvl() + { + if (_c_lvl.size() < 1) + { + throw std::runtime_error( + "interval-based metric requested, " + "but *c_lvl* not provided" + ); + } + else{ + return _c_lvl; + } + } + + bool is_high_flow_event() + { + if (_events.has_value()) + { + if (_events.value() == "high") + { + return true; + } + else if (_events.value() == "low") + { + return false; + } + else + { + throw std::runtime_error( + "invalid value for *events*: " + _events.value() + ); + } + } + else + { + throw std::runtime_error( + "threshold-based metric requested, " + "but *events* not provided" + ); + } + } + + // methods to compute elements + xt::xtensor<double, 4> get_t_counts() + { + if (!t_counts.has_value()) + { + t_counts = elements::calc_t_counts( + t_msk, b_exp, n_sit, n_ldt, n_msk, n_exp + ); + } + return t_counts.value(); + }; + + xt::xtensor<double, 3> get_o_k() + { + if (!o_k.has_value()) + { + o_k = elements::calc_o_k( + q_obs, get_q_thr(), is_high_flow_event() + ); + } + return o_k.value(); + }; + + xt::xtensor<double, 5> get_bar_o() + { + if (!bar_o.has_value()) + { + bar_o = elements::calc_bar_o( + get_o_k(), t_msk, b_exp, + n_sit, n_ldt, n_thr, n_msk, n_exp + ); + } + return bar_o.value(); + }; + + xt::xtensor<double, 4> get_sum_f_k() + { + if (!sum_f_k.has_value()) + { + sum_f_k = elements::calc_sum_f_k( + q_prd, get_q_thr(), is_high_flow_event() + ); + } + return sum_f_k.value(); + }; + + xt::xtensor<double, 4> get_y_k() + { + if (!y_k.has_value()) + { + y_k = elements::calc_y_k( + get_sum_f_k(), n_mbr + ); + } + return y_k.value(); + }; + + xt::xtensor<double, 4> get_q_qnt() + { + if (!q_qnt.has_value()) + { + q_qnt = elements::calc_q_qnt( + q_prd + ); + } + return q_qnt.value(); + }; + + xt::xtensor<double, 5> get_a_k() + { + if (!a_k.has_value()) + { + a_k = elements::calc_a_k( + get_sum_f_k(), n_mbr + ); + } + return a_k.value(); + }; + + xt::xtensor<double, 5> get_ct_a() + { + if (!ct_a.has_value()) + { + ct_a = elements::calc_ct_a( + get_o_k(), get_a_k() + ); + } + return ct_a.value(); + }; + + xt::xtensor<double, 5> get_ct_b() + { + if (!ct_b.has_value()) + { + ct_b = elements::calc_ct_b( + get_o_k(), get_a_k() + ); + } + return ct_b.value(); + }; + + xt::xtensor<double, 5> get_ct_c() + { + if (!ct_c.has_value()) + { + ct_c = elements::calc_ct_c( + get_o_k(), get_a_k() + ); + } + return ct_c.value(); + }; + + xt::xtensor<double, 5> get_ct_d() + { + if (!ct_d.has_value()) + { + ct_d = elements::calc_ct_d( + get_o_k(), get_a_k() + ); + } + return ct_d.value(); + }; + + xt::xtensor<double, 3> get_r_k() + { + if (!r_k.has_value()) + { + r_k = elements::calc_r_k( + q_obs, get_q_qnt(), n_mbr, random_seed + ); + } + return r_k.value(); + }; + + xt::xtensor<double, 5> get_o_j() + { + if (!o_j.has_value()) + { + o_j = elements::calc_o_j( + get_r_k(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return o_j.value(); + }; + + xt::xtensor<double, 5> get_itv_bnds() + { + if (!itv_bnds.has_value()) + { + itv_bnds = elements::calc_itv_bnds( + q_prd, get_c_lvl(), + n_sit, n_ldt, n_itv, n_tim + ); + } + return itv_bnds.value(); + }; + + xt::xtensor<double, 4> get_obs_in_itv() + { + if (!obs_in_itv.has_value()) + { + obs_in_itv = elements::calc_obs_in_itv( + q_obs, get_itv_bnds() + ); + } + return obs_in_itv.value(); + }; + + xt::xtensor<double, 4> get_itv_width() + { + if (!itv_width.has_value()) + { + itv_width = elements::calc_itv_width( + get_itv_bnds() + ); + } + return itv_width.value(); + }; + + + xt::xtensor<double, 6> get_clim_bnds() + { + if (!clim_bnds.has_value()) + { + clim_bnds = elements::calc_clim_bnds( + q_obs, get_c_lvl(), t_msk, b_exp, + n_sit, n_ldt, n_itv, n_msk, n_exp + ); + } + return clim_bnds.value(); + }; + + // methods to compute intermediate metrics + xt::xtensor<double, 4> get_bs() + { + if (!bs.has_value()) + { + bs = intermediate::calc_bs( + get_o_k(), get_y_k() + ); + } + return bs.value(); + }; + + xt::xtensor<double, 3> get_crps_from_ecdf() + { + if (!crps_from_ecdf.has_value()) + { + crps_from_ecdf = intermediate::calc_crps_from_ecdf( + q_obs, get_q_qnt(), n_sit, n_ldt, n_mbr, n_tim + ); + } + return crps_from_ecdf.value(); + }; + + xt::xtensor<double, 4> get_qs() + { + if (!qs.has_value()) + { + qs = intermediate::calc_qs( + q_obs, get_q_qnt(), n_mbr + ); + } + return qs.value(); + }; + + xt::xtensor<double, 3> get_crps_from_qs() + { + if (!crps_from_qs.has_value()) + { + crps_from_qs = intermediate::calc_crps_from_qs( + get_qs(), n_mbr + ); + } + return crps_from_qs.value(); + }; + + xt::xtensor<double, 5> get_pod() + { + if (!pod.has_value()) + { + pod = intermediate::calc_pod( + get_ct_a(), get_ct_c() + ); + } + return pod.value(); + }; + + xt::xtensor<double, 5> get_pofd() + { + if (!pofd.has_value()) + { + pofd = intermediate::calc_pofd( + get_ct_b(), get_ct_d() + ); + } + return pofd.value(); + }; + + xt::xtensor<double, 5> get_far() + { + if (!far.has_value()) + { + far = intermediate::calc_far( + get_ct_a(), get_ct_b() + ); + } + return far.value(); + }; + + xt::xtensor<double, 5> get_csi() + { + if (!csi.has_value()) + { + csi = intermediate::calc_csi( + get_ct_a(), get_ct_b(), get_ct_c() + ); + } + return csi.value(); + }; + + xt::xtensor<double, 4> get_ws() + { + if (!ws.has_value()) + { + ws = intermediate::calc_ws( + q_obs, get_c_lvl(), get_itv_bnds() + ); + } + return ws.value(); + }; + + xt::xtensor<double, 2> get_es() + { + if (!es.has_value()) + { + es = intermediate::calc_es( + q_obs, q_prd, n_ldt, n_mbr, n_tim + ); + } + return es.value(); + }; + + public: + // constructor method + Evaluator(const XD2& obs, + const XD4& prd, + const XD2& thr, + const xt::xtensor<double, 1>& lvl, + xtl::xoptional<const std::string&, bool> events, + const XB4& msk, + const std::vector<xt::xkeep_slice<int>>& exp, + const long int seed) : + q_obs{obs}, q_prd{prd}, + _q_thr{thr}, _c_lvl{lvl}, _events{events}, + t_msk(msk), b_exp(exp), + random_seed{seed} + { + // initialise a mask if none provided + // (corresponding to no temporal subset) + if (msk.size() < 1) + { + t_msk = xt::ones<bool>( + {q_prd.shape(0), q_prd.shape(1), + std::size_t {1}, q_prd.shape(3)} + ); + } + + // determine size for recurring dimensions + n_sit = q_prd.shape(0); + n_ldt = q_prd.shape(1); + n_mbr = q_prd.shape(2); + n_tim = q_prd.shape(3); + n_msk = t_msk.shape(2); + n_thr = _q_thr.shape(1); + n_itv = _c_lvl.size(); + n_exp = b_exp.size(); + + // drop time steps where observations and/or predictions are NaN + for (std::size_t s = 0; s < n_sit; s++) + { + for (std::size_t l = 0; l < n_ldt; l++) + { + auto obs_nan = + xt::isnan(xt::view(q_obs, s)); + auto prd_nan = + xt::isnan(xt::view(q_prd, s, l)); + auto sum_nan = + xt::eval(xt::sum(prd_nan, -1)); + + if (xt::amin(sum_nan) != xt::amax(sum_nan)) + { + throw std::runtime_error( + "predictions across members feature " + "non-equal lengths" + ); + } + + auto msk_nan = + xt::where(obs_nan || xt::row(prd_nan, 0))[0]; + + xt::view(t_msk, s, l, xt::all(), xt::keep(msk_nan)) = + false; + } + } + }; + + // methods to compute metrics + xt::xtensor<double, 5> get_BS() + { + if (!BS.has_value()) + { + BS = metrics::calc_BS( + get_bs(), get_q_thr(), t_msk, b_exp, + n_sit, n_ldt, n_thr, n_msk, n_exp + ); + } + return BS.value(); + }; + + xt::xtensor<double, 7> get_REL_DIAG() + { + if (!REL_DIAG.has_value()) + { + REL_DIAG = metrics::calc_REL_DIAG( + get_q_thr(), get_o_k(), get_y_k(), + t_msk, b_exp, + n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp + ); + } + return REL_DIAG.value(); + }; + + xt::xtensor<double, 6> get_BS_CRD() + { + if (!BS_CRD.has_value()) + { + BS_CRD = metrics::calc_BS_CRD( + get_q_thr(), get_bar_o(), get_REL_DIAG(), + get_t_counts(), + n_sit, n_ldt, n_thr, n_msk, n_exp + ); + } + return BS_CRD.value(); + }; + + xt::xtensor<double, 6> get_BS_LBD() + { + if (!BS_LBD.has_value()) + { + BS_LBD = metrics::calc_BS_LBD( + get_q_thr(), get_o_k(), get_y_k(), + t_msk, b_exp, get_t_counts(), + n_sit, n_ldt, n_thr, n_msk, n_exp + ); + } + return BS_LBD.value(); + }; + + xt::xtensor<double, 5> get_BSS() + { + if (!BSS.has_value()) + { + BSS = metrics::calc_BSS( + get_bs(), get_q_thr(), get_o_k(), get_bar_o(), t_msk, + b_exp, n_sit, n_ldt, n_thr, n_msk, n_exp + ); + } + return BSS.value(); + }; + + xt::xtensor<double, 4> get_CRPS_FROM_BS() + { + if (!CRPS_FROM_BS.has_value()) + { + CRPS_FROM_BS = metrics::calc_CRPS_FROM_BS( + q_obs, q_prd, is_high_flow_event(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return CRPS_FROM_BS.value(); + }; + + xt::xtensor<double, 4> get_CRPS_FROM_ECDF() + { + if (!CRPS_FROM_ECDF.has_value()) + { + CRPS_FROM_ECDF = metrics::calc_CRPS_FROM_ECDF( + get_crps_from_ecdf(), t_msk, b_exp, + n_sit, n_ldt, n_msk, n_exp + ); + } + return CRPS_FROM_ECDF.value(); + }; + + xt::xtensor<double, 5> get_QS() + { + if (!QS.has_value()) + { + QS = metrics::calc_QS( + get_qs(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return QS.value(); + }; + + xt::xtensor<double, 4> get_CRPS_FROM_QS() + { + if (!CRPS_FROM_QS.has_value()) + { + CRPS_FROM_QS = metrics::calc_CRPS_FROM_QS( + get_crps_from_qs(), t_msk, b_exp, + n_sit, n_ldt, n_msk, n_exp + ); + } + return CRPS_FROM_QS.value(); + }; + + xt::xtensor<double, 6> get_POD() + { + if (!POD.has_value()) + { + POD = metrics::calc_POD( + get_pod(), get_q_thr(), t_msk, b_exp, + n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp + ); + } + return POD.value(); + }; + + xt::xtensor<double, 6> get_POFD() + { + if (!POFD.has_value()) + { + POFD = metrics::calc_POFD( + get_pofd(), get_q_thr(), t_msk, b_exp, + n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp + ); + } + return POFD.value(); + }; + + xt::xtensor<double, 6> get_FAR() + { + if (!FAR.has_value()) + { + FAR = metrics::calc_FAR( + get_far(), get_q_thr(), t_msk, b_exp, + n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp + ); + } + return FAR.value(); + }; + + xt::xtensor<double, 6> get_CSI() + { + if (!CSI.has_value()) + { + CSI = metrics::calc_CSI( + get_csi(), get_q_thr(), t_msk, b_exp, + n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp + ); + } + return CSI.value(); + }; + + xt::xtensor<double, 5> get_ROCSS() + { + if (!ROCSS.has_value()) + { + ROCSS = metrics::calc_ROCSS( + get_POD(), get_POFD(), get_q_thr() + ); + } + return ROCSS.value(); + }; + + xt::xtensor<double, 5> get_RANK_HIST() + { + if (!RANK_HIST.has_value()) + { + RANK_HIST = metrics::calc_RANK_HIST( + get_o_j(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return RANK_HIST.value(); + }; + + xt::xtensor<double, 4> get_DS() + { + if (!DS.has_value()) + { + DS = metrics::calc_DS( + get_o_j(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return DS.value(); + }; + + xt::xtensor<double, 4> get_AS() + { + if (!AS.has_value()) + { + AS = metrics::calc_AS( + get_r_k(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return AS.value(); + }; + + xt::xtensor<double, 5> get_CR() + { + if (!CR.has_value()) + { + CR = metrics::calc_CR( + get_obs_in_itv(), t_msk, b_exp, + n_sit, n_ldt, n_itv, n_msk, n_exp + ); + } + return CR.value(); + }; + + xt::xtensor<double, 5> get_AW() + { + if (!AW.has_value()) + { + AW = metrics::calc_AW( + get_itv_width(), t_msk, b_exp, + n_sit, n_ldt, n_itv, n_msk, n_exp + ); + } + return AW.value(); + }; + + xt::xtensor<double, 5> get_AWN() + { + if (!AWN.has_value()) + { + AWN = metrics::calc_AWN( + q_obs, get_AW(), t_msk, b_exp, + n_sit, n_ldt, n_msk, n_exp + ); + } + return AWN.value(); + }; + + xt::xtensor<double, 5> get_AWI() + { + if (!AWI.has_value()) + { + AWI = metrics::calc_AWI( + get_AW(), get_clim_bnds() + ); + } + return AWI.value(); + }; + + xt::xtensor<double, 5> get_WS() + { + if (!WS.has_value()) + { + WS = metrics::calc_WS( + get_ws(), t_msk, b_exp, + n_sit, n_ldt, n_itv, n_msk, n_exp + ); + } + return WS.value(); + }; + + xt::xtensor<double, 5> get_WSS() + { + if (!WSS.has_value()) + { + WSS = metrics::calc_WSS( + q_obs, get_c_lvl(), get_clim_bnds(), get_WS(), + t_msk, b_exp, n_sit, n_ldt, n_itv, n_msk, n_exp + ); + } + return WSS.value(); + }; + + xt::xtensor<double, 4> get_ES() + { + if (!ES.has_value()) + { + ES = metrics::calc_ES( + get_es(), t_msk, b_exp, n_ldt, n_msk, n_exp + ); + } + return ES.value(); + }; + + // methods to compute diagnostics + xt::xtensor<double, 4> get_completeness() + { + return get_t_counts(); + }; + }; + } +} + +#endif //EVALHYD_PROBABILIST_EVALUATOR_HPP diff --git a/include/evalhyd/detail/probabilist/intervals.hpp b/include/evalhyd/detail/probabilist/intervals.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c36a235838ead80eec395b4ccd342684217b5afc --- /dev/null +++ b/include/evalhyd/detail/probabilist/intervals.hpp @@ -0,0 +1,650 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_INTERVALS_HPP +#define EVALHYD_PROBABILIST_INTERVALS_HPP + +#include <limits> + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xindex_view.hpp> +#include <xtensor/xsort.hpp> + + +namespace evalhyd +{ + namespace probabilist + { + namespace elements + { + /// Compute the bounds of the predictive intervals by computing + /// the quantiles of the predictive distribution corresponding + /// to the confidence intervals. + /// + /// \param q_prd + /// Streamflow predictions. + /// shape: (sites, lead times, members, time) + /// \param c_lvl + /// Confidence levels for the predictive intervals. + /// shape: (intervals,) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_itv + /// Number of predictive intervals. + /// \param n_tim + /// Number of time steps. + /// \return + /// Bounds of the predictive intervals. + /// shape: (sites, lead times, intervals, bounds, time) + template <class XD4> + inline xt::xtensor<double, 5> calc_itv_bnds( + const XD4& q_prd, + const xt::xtensor<double, 1>& c_lvl, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_itv, + std::size_t n_tim + ) + { + xt::xtensor<double, 5> itv_bnds = + xt::zeros<double>({n_sit, n_ldt, n_itv, std::size_t {2}, n_tim}); + + // determine quantiles forming the predictive intervals + // from the confidence levels + xt::xtensor<double, 2> quantiles = + xt::zeros<double>({n_itv, std::size_t {2}}); + xt::col(quantiles, 0) = 0.5 - c_lvl / 200.; + xt::col(quantiles, 1) = 0.5 + c_lvl / 200.; + + // compute predictive interval bounds from quantiles + for (std::size_t i = 0; i < n_itv; i++) + { + auto q = xt::quantile(q_prd, xt::view(quantiles, i), 2); + + xt::view(itv_bnds, xt::all(), xt::all(), i, 0, xt::all()) = + xt::view(q, 0); + xt::view(itv_bnds, xt::all(), xt::all(), i, 1, xt::all()) = + xt::view(q, 1); + } + + return itv_bnds; + } + + /// Determine whether the observations are inside the predictive + /// intervals for each time step. + /// + /// \param q_obs + /// Streamflow predictions. + /// shape: (sites, time) + /// \param itv_bnds + /// Bounds of the predictive intervals. + /// shape: (sites, lead times, intervals, bounds, time) + /// \return + /// Boolean-like tensor evaluating to true where observations + /// are inside the predictive intervals. + /// shape: (sites, lead times, intervals, time) + template <class XD2> + inline xt::xtensor<double, 4> calc_obs_in_itv( + const XD2& q_obs, + const xt::xtensor<double, 5>& itv_bnds + ) + { + // notations below follow Gneiting and Raftery (2007), sect 6.2 + // https://doi.org/10.1198/016214506000001437 + + auto x = xt::view(q_obs, xt::all(), xt::newaxis(), xt::newaxis(), xt::all()); + auto l = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 0, xt::all()); + auto u = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 1, xt::all()); + + return ((x >= l) && (x <= u)); + } + + /// Compute the width of the predictive intervals for each time step. + /// + /// \param itv_bnds + /// Bounds of the predictive intervals. + /// shape: (sites, lead times, intervals, bounds, time) + /// \return + /// Interval scores for each time step. + /// shape: (sites, lead times, intervals, time) + inline xt::xtensor<double, 4> calc_itv_width( + const xt::xtensor<double, 5>& itv_bnds + ) + { + // notations below follow Gneiting and Raftery (2007), sect 6.2 + // https://doi.org/10.1198/016214506000001437 + + auto l = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 0, xt::all()); + auto u = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 1, xt::all()); + + return (u - l); + } + + /// Compute the bounds of the climatology corresponding to the + /// confidence levels. + /// + /// \param q_obs + /// Streamflow predictions. + /// shape: (sites, time) + /// \param c_lvl + /// Confidence levels for the predictive intervals. + /// shape: (intervals,) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_itv + /// Number of predictive intervals. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Climatology bounds. + /// shape: (sites, lead times, subsets, samples, intervals, bounds) + template <class XD2> + inline xt::xtensor<double, 6> calc_clim_bnds( + const XD2& q_obs, + const xt::xtensor<double, 1>& c_lvl, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_itv, + std::size_t n_msk, + std::size_t n_exp + ) + { + // compute "climatology" average width + xt::xtensor<double, 6> clim_bnds = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, + n_itv, std::size_t {2}}); + + // determine quantiles forming the predictive intervals + // from the confidence levels + xt::xtensor<double, 2> quantiles = + xt::zeros<double>({n_itv, std::size_t {2}}); + xt::col(quantiles, 0) = 0.5 - c_lvl / 200.; + xt::col(quantiles, 1) = 0.5 + c_lvl / 200.; + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto q_obs_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), + xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()), + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto q_obs_masked_sampled = + xt::view(q_obs_masked, xt::all(), xt::all(), b_exp[e]); + + // compute "climatology" interval + for (std::size_t s = 0; s < n_sit; s++) + { + for (std::size_t l = 0; l < n_ldt; l++) + { + for (std::size_t i = 0; i < n_itv; i++) + { + auto obs = xt::view(q_obs_masked_sampled, + s, l, xt::all()); + auto obs_filtered = + xt::filter(obs, !xt::isnan(obs)); + + if (obs_filtered.size() > 0) + { + // lower bound + xt::view(clim_bnds, s, l, m, e, i, 0) = + xt::quantile( + obs_filtered, + {quantiles(i, 0)} + )(); + // upper bound + xt::view(clim_bnds, s, l, m, e, i, 1) = + xt::quantile( + obs_filtered, + {quantiles(i, 1)} + )(); + } + else + { + xt::view(clim_bnds, s, l, m, e, i, xt::all()) = + NAN; + } + + } + } + } + } + } + + return clim_bnds; + } + } + + namespace intermediate + { + /// Compute the Winkler score for each time step. + /// + /// \param q_obs + /// Streamflow predictions. + /// shape: (sites, time) + /// \param c_lvl + /// Confidence levels for the predictive intervals. + /// shape: (intervals,) + /// \param itv_bnds + /// Bounds of the predictive intervals. + /// shape: (sites, lead times, intervals, bounds, time) + /// \return + /// Interval scores for each time step. + /// shape: (sites, lead times, intervals, time) + template <class XD2> + inline xt::xtensor<double, 4> calc_ws( + const XD2& q_obs, + const xt::xtensor<double, 1>& c_lvl, + const xt::xtensor<double, 5>& itv_bnds + ) + { + // notations below follow Gneiting and Raftery (2007), sect 6.2 + // https://doi.org/10.1198/016214506000001437 + + auto x = xt::view(q_obs, xt::all(), xt::newaxis(), xt::newaxis(), xt::all()); + auto alpha = 1 - xt::view(c_lvl, xt::all(), xt::newaxis()) / 100.; + + // compute component corresponding to observations below interval + auto l = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 0, xt::all()); + // (l - x)ðŸ™{x < l} + auto ws_l = xt::where(x < l, l - x, 0.); + + // compute component corresponding to observations above interval + auto u = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 1, xt::all()); + // (x - u)ðŸ™{x > u} + auto ws_u = xt::where(x > u, x - u, 0.); + + // compute interval score + auto ws = (u - l) + 2. * (ws_l + ws_u) / alpha; + + return ws; + } + } + + namespace metrics + { + namespace detail { + inline xt::xtensor<double, 5> calc_METRIC_from_metric( + const xt::xtensor<double, 4>& metric, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_itv, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 5> METRIC = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_itv}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto metric_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, + xt::newaxis(), xt::all()), + metric, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto metric_masked_sampled = + xt::view(metric_masked, xt::all(), + xt::all(), xt::all(), b_exp[e]); + + // calculate the mean over the time steps + xt::view(METRIC, xt::all(), xt::all(), m, e, xt::all()) = + xt::nanmean(metric_masked_sampled, -1); + } + } + + return METRIC; + } + } + + /// Compute the Coverage Ratio (CR), i.e. the portion of + /// observations falling within the predictive intervals. + /// It is a measure of the reliability of the predictions. + /// + /// \param obs_in_itv + /// Boolean-like tensor evaluating to true where observations + /// are inside the predictive intervals. + /// shape: (sites, lead times, intervals, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_itv + /// Number of predictive intervals. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Coverage ratios. + /// shape: (sites, lead times, subsets, samples, intervals) + inline xt::xtensor<double, 5> calc_CR( + const xt::xtensor<double, 4>& obs_in_itv, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_itv, + std::size_t n_msk, + std::size_t n_exp + ) + { + return detail::calc_METRIC_from_metric( + obs_in_itv, t_msk, b_exp, + n_sit, n_ldt, n_itv, n_msk, n_exp + ); + } + + /// Compute the Average Width (AW) of the predictive intervals. + /// It is a measure of the sharpness of the predictions. + /// + /// \param itv_width + /// Widths of predictive intervals for each time step. + /// shape: (sites, lead times, intervals, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_itv + /// Number of predictive intervals. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Average widths. + /// shape: (sites, lead times, subsets, samples, intervals) + inline xt::xtensor<double, 5> calc_AW( + const xt::xtensor<double, 4>& itv_width, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_itv, + std::size_t n_msk, + std::size_t n_exp + ) + { + return detail::calc_METRIC_from_metric( + itv_width, t_msk, b_exp, n_sit, n_ldt, n_itv, n_msk, n_exp + ); + } + + /// Compute the Average Width Normalised (AWN). + /// + /// \param q_obs + /// Streamflow predictions. + /// shape: (sites, time) + /// \param AW + /// Average widths. + /// shape: (sites, lead times, subsets, samples, intervals) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Average widths normalised with mean observations. + /// shape: (sites, lead times, subsets, samples, intervals) + template <class XD2> + inline xt::xtensor<double, 5> calc_AWN( + const XD2& q_obs, + const xt::xtensor<double, 5>& AW, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_msk, + std::size_t n_exp + ) + { + // compute "climatology" average width + xt::xtensor<double, 5> mean_obs = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, + std::size_t {1}}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto q_obs_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), + xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()), + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto q_obs_masked_sampled = + xt::view(q_obs_masked, xt::all(), xt::all(), b_exp[e]); + + // compute mean observation + xt::view(mean_obs, xt::all(), xt::all(), m, e, 0) = + xt::nanmean(q_obs_masked_sampled, -1); + } + } + + return xt::where(mean_obs > 0, + AW / mean_obs, + - std::numeric_limits<double>::infinity()); + } + + /// Compute the Average Width Index (AWI). + /// + /// \param AW + /// Average widths. + /// shape: (sites, lead times, subsets, samples, intervals) + /// \param clim_bnds + /// Climatology bounds. + /// shape: (sites, lead times, subsets, samples, intervals, bounds) + /// \return + /// Average width indices. + /// shape: (sites, lead times, subsets, samples, intervals) + inline xt::xtensor<double, 5> calc_AWI( + const xt::xtensor<double, 5>& AW, + const xt::xtensor<double, 6>& clim_bnds + ) + { + // compute "climatology" average width + auto AW_clim = + xt::view(clim_bnds, xt::all(), xt::all(), xt::all(), + xt::all(), xt::all(), 1) + - xt::view(clim_bnds, xt::all(), xt::all(), xt::all(), + xt::all(), xt::all(), 0); + + return xt::where(AW_clim > 0, + 1 - (AW / AW_clim), + - std::numeric_limits<double>::infinity()); + } + + /// Compute the Winkler scores (WS), also known as interval score. + /// + /// \param ws + /// Winkler scores for each time step. + /// shape: (sites, lead times, intervals, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_itv + /// Number of predictive intervals. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Winkler scores. + /// shape: (sites, lead times, subsets, samples, intervals) + inline xt::xtensor<double, 5> calc_WS( + const xt::xtensor<double, 4>& ws, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_itv, + std::size_t n_msk, + std::size_t n_exp + ) + { + return detail::calc_METRIC_from_metric( + ws, t_msk, b_exp, n_sit, n_ldt, n_itv, n_msk, n_exp + ); + } + + /// Compute the Winkler skill scores (WSS). + /// + /// \param q_obs + /// Streamflow predictions. + /// shape: (sites, time) + /// \param c_lvl + /// Confidence levels for the predictive intervals. + /// shape: (intervals,) + /// \param clim_bnds + /// Climatology bounds. + /// shape: (sites, lead times, subsets, samples, intervals, bounds) + /// \param WS + /// Winkler scores. + /// shape: (sites, lead times, subsets, samples, intervals) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_itv + /// Number of predictive intervals. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Winkler skill scores. + /// shape: (sites, lead times, subsets, samples, intervals) + template <class XD2> + inline xt::xtensor<double, 5> calc_WSS( + const XD2& q_obs, + const xt::xtensor<double, 1>& c_lvl, + const xt::xtensor<double, 6>& clim_bnds, + const xt::xtensor<double, 5>& WS, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_itv, + std::size_t n_msk, + std::size_t n_exp + ) + { + // compute "climatology" Winkler score + xt::xtensor<double, 5> WS_clim = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_itv}); + + for (std::size_t l = 0; l < n_ldt; l++) + { + for (std::size_t m = 0; m < n_msk; m++) + { + for (std::size_t e = 0; e < n_exp; e++) + { + auto ws_clim = intermediate::calc_ws( + q_obs, c_lvl, + xt::view(clim_bnds, xt::all(), l, xt::newaxis(), + m, e, xt::all(), xt::all(), + xt::newaxis()) + ); + + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto ws_clim_masked = xt::where( + xt::view(t_msk, xt::all(), l, m, xt::newaxis(), xt::all()), + xt::view(ws_clim, xt::all(), l, xt::all(), xt::all()), + NAN + ); + + // apply the bootstrap sampling + auto ws_clim_masked_sampled = + xt::view(ws_clim_masked, xt::all(), xt::all(), b_exp[e]); + + xt::view(WS_clim, xt::all(), l, m, e, xt::all()) = + xt::nanmean(ws_clim_masked_sampled, -1); + } + } + } + + // compute the Winkler skill score + return xt::where(WS_clim > 0, + 1 - (WS / WS_clim), + - std::numeric_limits<double>::infinity()); + } + } + } +} + +#endif //EVALHYD_PROBABILIST_INTERVALS_HPP diff --git a/include/evalhyd/detail/probabilist/multivariate.hpp b/include/evalhyd/detail/probabilist/multivariate.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d0ca887c53041e2e25ab71db60260b20905bebda --- /dev/null +++ b/include/evalhyd/detail/probabilist/multivariate.hpp @@ -0,0 +1,173 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_MULTIVARIATE_HPP +#define EVALHYD_PROBABILIST_MULTIVARIATE_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xmath.hpp> + + +namespace evalhyd +{ + namespace probabilist + { + namespace intermediate + { + /// Compute the energy score for each time step computed using its + /// formulation based on expectancies where the ensemble is used as + /// the random variable. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (sites, time) + /// \param q_prd + /// Streamflow predictions. + /// shape: (sites, lead times, members, time) + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_tim + /// Number of time steps. + /// \return + /// CRPS for each time step. + /// shape: (lead times, time) + template <class XD2, class XD4> + inline xt::xtensor<double, 2> calc_es( + const XD2& q_obs, + const XD4& q_prd, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_tim + ) + { + // notations below follow Gneiting et al. (2008) + // https://doi.org/10.1007/s11749-008-0114-x + + // initialise internal variable + xt::xtensor<double, 2> es_xj_x = + xt::zeros<double>({n_ldt, n_tim}); + xt::xtensor<double, 2> es_xi_xj = + xt::zeros<double>({n_ldt, n_tim}); + + for (std::size_t j = 0; j < n_mbr; j++) + { + // $\sum_{j=1}^{m} || x_j - x ||$ + es_xj_x += xt::sqrt( + xt::sum( + xt::square( + // x_j is the jth member of q_prd + xt::view(q_prd, xt::all(), xt::all(), + j, xt::all()) + // x is q_obs + - xt::view(q_obs, xt::all(), + xt::newaxis(), xt::all()) + ), + 0 + ) + ); + + for (std::size_t i = 0; i < n_mbr; i++) + { + // $\sum_{i=1}^{m} \sum_{j=1}^{m} || x_i - x_j ||$ + es_xi_xj += xt::sqrt( + xt::sum( + xt::square( + // x_i is the ith member of q_prd + xt::view(q_prd, xt::all(), + xt::all(), i, xt::all()) + // x_j is the jth member of q_prd + - xt::view(q_prd, xt::all(), + xt::all(), j, xt::all()) + ), + 0 + ) + ); + } + } + + auto es = ( + (1. / n_mbr * es_xj_x) + - (1. / (2 * n_mbr * n_mbr) * es_xi_xj) + ); + + return es; + } + } + + namespace metrics + { + /// Compute the energy score (ES), a multi-site generalisation + /// of the continuous rank probability score. + /// + /// \param es + /// ES for each time step. + /// shape: (lead times, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_ldt + /// Number of lead times. + /// \param n_tim + /// Number of time steps. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// ES. + /// shape: (lead times, subsets, samples) + inline xt::xtensor<double, 4> calc_ES( + const xt::xtensor<double, 2>& es, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_ldt, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 4> ES = + xt::zeros<double>({std::size_t {1}, n_ldt, n_msk, n_exp}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // determine the multi-site mask (i.e. only retain time + // steps where no site is masked) + auto msk = xt::prod( + xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), + 0 + ); + + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto es_masked = xt::where(msk, es, NAN); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto es_masked_sampled = xt::view( + es_masked, xt::all(), b_exp[e] + ); + + // calculate the mean over the time steps + xt::view(ES, 0, xt::all(), m, e) = + xt::nanmean(es_masked_sampled, -1); + } + } + + return ES; + } + } + } +} + +#endif //EVALHYD_PROBABILIST_MULTIVARIATE_HPP diff --git a/include/evalhyd/detail/probabilist/quantiles.hpp b/include/evalhyd/detail/probabilist/quantiles.hpp new file mode 100644 index 0000000000000000000000000000000000000000..88bd5284b5c3983a2b562ce8e88e37bd92d8b95a --- /dev/null +++ b/include/evalhyd/detail/probabilist/quantiles.hpp @@ -0,0 +1,255 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_QUANTILES_HPP +#define EVALHYD_PROBABILIST_QUANTILES_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xsort.hpp> +#include <xtensor/xmath.hpp> + + +// NOTE ------------------------------------------------------------------------ +// All equations in metrics below are following notations from +// "Wilks, D. S. (2011). Statistical methods in the atmospheric sciences. +// Amsterdam; Boston: Elsevier Academic Press. ISBN: 9780123850225". +// In particular, pp. 302-303, 332-333. +// ----------------------------------------------------------------------------- + +namespace evalhyd +{ + namespace probabilist + { + namespace elements + { + /// Compute the forecast quantiles from the ensemble members. + /// + /// \param q_prd + /// Streamflow predictions. + /// shape: (sites, lead times, members, time) + /// \return + /// Streamflow forecast quantiles. + /// shape: (sites, lead times, quantiles, time) + template <class XD4> + inline xt::xtensor<double, 4> calc_q_qnt( + const XD4& q_prd + ) + { + return xt::sort(q_prd, 2); + } + } + + namespace intermediate + { + /// Compute the quantile scores for each time step. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (sites, time) + /// \param q_qnt + /// Streamflow quantiles. + /// shape: (sites, lead times, quantiles, time) + /// \param n_mbr + /// Number of ensemble members. + /// \return + /// Quantile scores for each time step. + /// shape: (sites, lead times, quantiles, time) + template <class XD2> + inline xt::xtensor<double, 4> calc_qs( + const XD2 &q_obs, + const xt::xtensor<double, 4>& q_qnt, + std::size_t n_mbr + ) + { + // compute the quantile order $alpha$ + xt::xtensor<double, 1> alpha = + xt::arange<double>(1., double(n_mbr + 1)) + / double(n_mbr + 1); + + // calculate the difference + xt::xtensor<double, 4> diff = + q_qnt - xt::view(q_obs, xt::all(), xt::newaxis(), + xt::newaxis(), xt::all()); + + // calculate the quantile scores + xt::xtensor<double, 4> qs = xt::where( + diff > 0, + 2 * (1 - xt::view(alpha, xt::newaxis(), xt::newaxis(), + xt::all(), xt::newaxis())) * diff, + - 2 * xt::view(alpha, xt::newaxis(), xt::newaxis(), + xt::all(), xt::newaxis()) * diff + ); + + return qs; + } + + /// Compute the continuous rank probability score(s) based + /// on quantile scores for each time step, and integrating using the + /// trapezoidal rule. + /// + /// /!\ The number of quantiles must be sufficiently large so that the + /// cumulative distribution is smooth enough for the numerical + /// integration to be accurate. + /// + /// \param qs + /// Quantile scores for each time step. + /// shape: (sites, lead times, quantiles, time) + /// \param n_mbr + /// Number of ensemble members. + /// \return + /// CRPS for each time step. + /// shape: (sites, lead times, time) + inline xt::xtensor<double, 3> calc_crps_from_qs( + const xt::xtensor<double, 4>& qs, + std::size_t n_mbr + ) + { + // integrate with trapezoidal rule + // xt::trapz(y, dx=1/(n+1), axis=2) + return xt::trapz(qs, 1./(double(n_mbr) + 1.), 2); + } + } + + namespace metrics + { + /// Compute the quantile score (QS). + /// + /// \param qs + /// Quantile scores for each time step. + /// shape: (sites, lead times, quantiles, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Quantile scores. + /// shape: (sites, lead times, subsets, samples, quantiles) + inline xt::xtensor<double, 5> calc_QS( + const xt::xtensor<double, 4>& qs, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 5> QS = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_mbr}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto qs_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, + xt::newaxis(), xt::all()), + qs, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto qs_masked_sampled = + xt::view(qs_masked, xt::all(), xt::all(), + xt::all(), b_exp[e]); + + // calculate the mean over the time steps + // $QS = \frac{1}{n} \sum_{k=1}^{n} qs$ + xt::view(QS, xt::all(), xt::all(), m, e, xt::all()) = + xt::nanmean(qs_masked_sampled, -1); + } + } + + return QS; + } + + /// Compute the continuous rank probability score based on the + /// integration over the quantile scores (CRPS_FROM_QS). + /// + /// \param crps_from_qs + /// CRPS for each time step. + /// shape: (sites, lead times, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// CRPS. + /// shape: (sites, lead times, subsets, samples) + inline xt::xtensor<double, 4> calc_CRPS_FROM_QS( + const xt::xtensor<double, 3>& crps_from_qs, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 4> CRPS_FROM_QS = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto crps_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), + crps_from_qs, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto crps_masked_sampled = + xt::view(crps_masked, xt::all(), xt::all(), + b_exp[e]); + + // calculate the mean over the time steps + // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$ + xt::view(CRPS_FROM_QS, xt::all(), xt::all(), m, e) = + xt::squeeze(xt::nanmean(crps_masked_sampled, -1)); + } + } + + return CRPS_FROM_QS; + } + } + } +} + +#endif //EVALHYD_PROBABILIST_QUANTILES_HPP diff --git a/include/evalhyd/detail/probabilist/ranks.hpp b/include/evalhyd/detail/probabilist/ranks.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d28675c8b16ce0f1a34aa807650c6cccf131980b --- /dev/null +++ b/include/evalhyd/detail/probabilist/ranks.hpp @@ -0,0 +1,452 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_RANKS_HPP +#define EVALHYD_PROBABILIST_RANKS_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xindex_view.hpp> +#include <xtensor/xsort.hpp> +#include <xtensor/xrandom.hpp> + + +namespace evalhyd +{ + namespace probabilist + { + namespace elements + { + /// Compute the position of the observations amongst the ensemble + /// member predictions (i.e. their ranks). + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (sites, time) + /// \param q_qnt + /// Streamflow quantiles. + /// shape: (sites, lead times, quantiles, time) + /// \param n_mbr + /// Number of ensemble members. + /// \param seed + /// Seed to be used by random generator. + /// \return + /// Ranks of streamflow observations. + /// shape: (sites, lead times, time) + template <class XD2, class XD4> + inline xt::xtensor<double, 3> calc_r_k( + const XD2& q_obs, + const XD4& q_qnt, + std::size_t n_mbr, + long int seed + ) + { + xt::xtensor<double, 3> ranks = xt::zeros<double>( + {q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)} + ); + xt::view(ranks, xt::all()) = NAN; + + xt::xtensor<double, 3> min_ranks = xt::zeros<double>( + {q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)} + ); + xt::view(min_ranks, xt::all()) = NAN; + + xt::xtensor<double, 3> max_ranks = xt::zeros<double>( + {q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)} + ); + xt::view(max_ranks, xt::all()) = NAN; + + for (std::size_t m = 0; m < n_mbr; m++) + { + // strictly below a member and no rank yet + xt::view(ranks, xt::all()) = xt::where( + (xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()) + < xt::view(q_qnt, xt::all(), xt::all(), m, xt::all())) + && + xt::isnan(ranks), + m, + ranks + ); + + // first time tied with a member + xt::view(min_ranks, xt::all()) = xt::where( + xt::equal(xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()), + xt::view(q_qnt, xt::all(), xt::all(), m, xt::all())) + && + xt::isnan(min_ranks), + m, + min_ranks + ); + + // again tied with a member + xt::view(max_ranks, xt::all()) = xt::where( + xt::equal(xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()), + xt::view(q_qnt, xt::all(), xt::all(), m, xt::all())) + && + !xt::isnan(min_ranks), + m + 1, + max_ranks + ); + } + + // above last member + xt::view(ranks, xt::all()) = xt::where( + xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()) + > xt::view(q_qnt, xt::all(), xt::all(), n_mbr - 1, xt::all()), + n_mbr, + ranks + ); + + // for ties, take random rank between min and max + xt::random::seed(seed); + xt::view(ranks, xt::all()) = xt::where( + !xt::isnan(min_ranks), + min_ranks + + xt::round((max_ranks - max_ranks + 1) + * xt::random::rand<double>(ranks.shape())), + ranks + ); + + return ranks; + } + + /// Compute the number of observations in each interval of the + /// rank diagram. + /// + /// \param r_k + /// Ranks of streamflow observations. + /// shape: (sites, lead times, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Tallies of streamflow observations in each rank interval. + /// shape: (sites, lead times, subsets, samples, ranks) + inline xt::xtensor<double, 5> calc_o_j( + const xt::xtensor<double, 3>& r_k, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 5> o_j = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_mbr + 1}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto r_k_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), + r_k, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto r_k_masked_sampled = + xt::view(r_k_masked, xt::all(), xt::all(), + b_exp[e]); + + for (std::size_t j = 0; j < n_mbr + 1; j++) + { + // compute the observed relative frequency + // $o_j = \sum_{k \in M_j} r_k$ + xt::view(o_j, xt::all(), xt::all(), m, e, j) = + xt::sum( + xt::equal(r_k_masked_sampled, j), + -1 + ); + } + } + } + + return o_j; + } + } + + namespace metrics + { + /// Compute the frequencies of the rank histogram, also known as + /// Talagrand diagram. + /// + /// \param o_j + /// Tallies of streamflow observations for all possible ranks. + /// shape: (sites, lead times, subsets, samples, ranks) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Frequencies of the rank histogram. + /// shape: (sites, lead times, subsets, samples, ranks) + inline xt::xtensor<double, 5> calc_RANK_HIST( + const xt::xtensor<double, 5>& o_j, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 5> REL_DIAG = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_mbr + 1}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto t_msk_sampled = + xt::view(t_msk, xt::all(), xt::all(), + m, b_exp[e]); + + // calculate length of subset + auto l = xt::eval( + xt::sum(t_msk_sampled, -1, xt::keep_dims) + ); + + // compute the rank diagram + xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all()) = + xt::view(o_j, xt::all(), xt::all(), + m, e, xt::all()) + / l + ; + } + } + + return REL_DIAG; + } + + /// Compute the Delta score. + /// + /// \param o_j + /// Tallies of streamflow observations for all possible ranks. + /// shape: (sites, lead times, subsets, samples, ranks) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Delta scores. + /// shape: (sites, lead times, subsets, samples) + inline xt::xtensor<double, 4> calc_DS( + const xt::xtensor<double, 5>& o_j, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 4> DS = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto t_msk_sampled = + xt::view(t_msk, xt::all(), xt::all(), + m, b_exp[e]); + + // calculate length of subset + auto l = xt::eval( + xt::sum(t_msk_sampled, -1, xt::keep_dims) + ); + + // compute the Delta score + // \Delta = \sum_{k=1}^{N+1} (r_k - \frac{M}{N+1})^2 + auto delta = xt::nansum( + xt::square( + xt::view(o_j, xt::all(), xt::all(), m, e, xt::all()) + - (l / (n_mbr + 1)) + ), + -1 + ); + + // \Delta_o = \frac{MN}{N+1} + auto delta_o = ( + xt::view(l, xt::all(), xt::all(), 0) + * n_mbr / (n_mbr + 1) + ); + + // \delta = $\frac{\Delta}{\Delta_o} + xt::view(DS, xt::all(), xt::all(), m, e) = + delta / delta_o; + } + } + + return DS; + } + + /// Compute the Alpha score. + /// + /// \param r_k + /// Ranks of streamflow observations. + /// shape: (sites, lead times, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Alpha scores. + /// shape: (sites, lead times, subsets, samples) + inline xt::xtensor<double, 4> calc_AS( + const xt::xtensor<double, 3>& r_k, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 4> AS = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute one site and one leadtime at a time because of + // potential NaN (of varying numbers across sites/lead times) + // in the ranks that are not put at the end with `xt::sort` + // (unlike `numpy.sort`) which prevent from an easy conversion + // from rank to probability + for (std::size_t s = 0; s < n_sit; s++) + { + for (std::size_t l = 0; l < n_ldt; l++) + { + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto r_k_masked = xt::where( + xt::view(t_msk, s, l, m, xt::all()), + xt::view(r_k, s, l, xt::all()), + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto r_k_masked_sampled = + xt::view(r_k_masked, b_exp[e]); + + // notations below follow Renard et al. (2010) + // https://doi.org/10.1029/2009WR008328 + + // compute observed p values + // $p_{x(i)}$ + auto p_x_i = xt::sort( + xt::eval( + // filter out NaNs + xt::filter( + r_k_masked_sampled, + !xt::isnan(r_k_masked_sampled) + ) + / n_mbr + ) + ); + + // calculate length of realisations + // $N_x$ + auto N_x = p_x_i.size(); + + // compute theoretical p values + // $p_{x(i)}^{(th)}$ + auto p_x_i_th = + xt::arange<double>(double(N_x)) / (N_x - 1); + + // compute area between the predictive curve and + // the 1:1 line in the Q-Q plot + // $\alpha'_x$ + auto alpha_prime_x = xt::nanmean( + xt::abs(p_x_i - p_x_i_th) + ); + + // compute the alpha score + // $\alpha_x = 1 - 2 \alpha'_x$ + xt::view(AS, s, l, m, e) = 1 - 2 * alpha_prime_x; + } + } + } + } + + return AS; + } + } + } +} + +#endif //EVALHYD_PROBABILIST_RANKS_HPP \ No newline at end of file diff --git a/include/evalhyd/detail/uncertainty.hpp b/include/evalhyd/detail/uncertainty.hpp new file mode 100644 index 0000000000000000000000000000000000000000..25b13af0957e7552f52fe648564f2abc67be0054 --- /dev/null +++ b/include/evalhyd/detail/uncertainty.hpp @@ -0,0 +1,301 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_UNCERTAINTY_HPP +#define EVALHYD_UNCERTAINTY_HPP + +#include <string> +#include <vector> +#include <array> +#include <ctime> +#include <chrono> +#include <iomanip> +#include <stdexcept> + +#include <xtensor/xtensor.hpp> +#include <xtensor/xadapt.hpp> +#include <xtensor/xrandom.hpp> +#include <xtensor/xsort.hpp> + + +typedef std::chrono::time_point< + std::chrono::system_clock, std::chrono::minutes +> tp_minutes; + +namespace evalhyd +{ + namespace uncertainty + { + inline auto bootstrap( + const std::vector<std::string>& datetimes, + int n_samples, int len_sample, long int seed + ) + { + // convert string to time_point (via tm) + std::vector<std::tm> v_tm; + std::vector<tp_minutes> v_timepoints; + + for (auto const& str: datetimes) + { + // convert string to tm + std::tm tm = {}; + std::istringstream ss(str); + ss >> std::get_time(&tm, "%Y-%m-%d %H:%M:%S"); + if (ss.fail()) + { + throw std::runtime_error("datetime string parsing failed"); + } + tm.tm_year += 400; // add 400y to avoid dates prior 1970 + // while preserving leap year pattern + v_tm.push_back(tm); + + // convert tm to time_point + auto tp = std::chrono::system_clock::from_time_t(std::mktime(&tm)); + v_timepoints.push_back( + std::chrono::time_point_cast<std::chrono::minutes>(tp) + ); + } + + // adapt vector into xtensor + xt::xtensor<tp_minutes, 1> x_timepoints = xt::adapt(v_timepoints); + + // check constant time interval + auto ti = x_timepoints[1] - x_timepoints[0]; + for (std::size_t t = 1; t < x_timepoints.size() - 1; t++) + { + if (x_timepoints[t + 1] - x_timepoints[t] != ti) + { + throw std::runtime_error( + "time interval not constant across datetimes" + ); + } + } + + // identify start and end years for period + int start_yr = v_tm.front().tm_year + 1900; + int end_yr = v_tm.back().tm_year + 1900; + + // assume start of year block as start of time series + std::tm start_hy = v_tm.front(); + + xt::xtensor<int, 1> year_blocks = xt::zeros<int>({v_tm.size()}); + for (int y = start_yr; y < end_yr; y++) + { + // define window for year blocks + start_hy.tm_year = y - 1900; + auto start = std::chrono::system_clock::from_time_t( + std::mktime(&start_hy) + ); + start_hy.tm_year += 1; + auto end = std::chrono::system_clock::from_time_t( + std::mktime(&start_hy) + ); + + xt::xtensor<bool, 1> wdw = + (x_timepoints >= start) && (x_timepoints < end); + + // check that year is complete (without a rigorous leap year check) + int n_days = xt::sum(wdw)(); + if ((n_days != 365) && (n_days != 366)) + { + throw std::runtime_error( + "year starting in " + std::to_string(y) + + " is incomplete" + ); + } + + // determine corresponding year block for each time step + year_blocks = xt::where(wdw, y, year_blocks); + } + + // check that time series ends on the last day of a year block + if (year_blocks(year_blocks.size() - 1) == 0) + { + throw std::runtime_error( + "final day of final year not equal to first day of " + "first year minus one time step" + ); + } + + // generate bootstrapping experiment + xt::random::seed(seed); + xt::xtensor<int, 2> experiment = xt::random::randint( + {n_samples, len_sample}, start_yr, end_yr + ); + + std::vector<xt::xkeep_slice<int>> samples; + + // compute metrics for each sample + for (int s = 0; s < n_samples; s++) + { + // select bootstrapped years + auto exp = xt::view(experiment, s); + + auto i0 = xt::flatten_indices( + xt::argwhere(xt::equal(year_blocks, exp(0))) + ); + auto i1 = xt::flatten_indices( + xt::argwhere(xt::equal(year_blocks, exp(1))) + ); + xt::xtensor<int, 1> idx = xt::concatenate(xt::xtuple(i0, i1), 0); + + for (std::size_t p = 2; p < exp.size(); p++) + { + auto i = xt::flatten_indices( + xt::argwhere(xt::equal(year_blocks, exp(p))) + ); + idx = xt::concatenate(xt::xtuple(idx, i), 0); + } + + samples.push_back(xt::keep(idx)); + } + + return samples; + } + + inline auto summarise_d(const xt::xarray<double>& values, int summary) + { + // define axis along which samples are + std::size_t axis = 2; + + // determine shape for output values + std::vector<std::size_t> shp; + std::size_t i = 0; + for (auto a : values.shape()) + { + if (i != axis) + { + shp.push_back(a); + } + else + { + if (summary == 1) + { + shp.push_back(2); + } + else if (summary == 2) + { + shp.push_back(7); + } + } + i++; + } + + // summary 2: series of quantiles across samples + if (summary == 2) + { + xt::xarray<double> v = xt::zeros<double>(shp); + + // compute quantiles + auto quantiles = xt::quantile( + values, + {0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95}, + axis + ); + + // transfer quantiles into correct axis + // (since xt::quantile puts the quantiles on the first axis) + for (std::size_t q = 0; q < 7; q++) + { + xt::view(v, xt::all(), xt::all(), q) = + xt::view(quantiles, q); + } + + return v; + } + // summary 1: mean and standard deviation across samples + else if (summary == 1) + { + xt::xarray<double> v = xt::zeros<double>(shp); + + // compute mean + xt::view(v, xt::all(), xt::all(), 0) = + xt::mean(values, {2}); + // compute standard deviation + xt::view(v, xt::all(), xt::all(), 1) = + xt::stddev(values, {2}); + + return v; + } + // summary 0: raw (keep all samples) + else + { + return values; + } + } + + inline auto summarise_p(const xt::xarray<double>& values, int summary) + { + // define axis along which samples are + std::size_t axis = 3; + + // determine shape for output values + std::vector<std::size_t> shp; + std::size_t i = 0; + for (auto a : values.shape()) + { + if (i != axis) + { + shp.push_back(a); + } + else + { + if (summary == 1) + { + shp.push_back(2); + } + else if (summary == 2) + { + shp.push_back(7); + } + } + i++; + } + + // summary 2: series of quantiles across samples + if (summary == 2) + { + xt::xarray<double> v = xt::zeros<double>(shp); + + // compute quantiles + auto quantiles = xt::quantile( + values, + {0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95}, + axis + ); + + // transfer quantiles into correct axis + // (since xt::quantile puts the quantiles on the first axis) + for (std::size_t q = 0; q < 7; q++) + { + xt::view(v, xt::all(), xt::all(), xt::all(), q) = + xt::view(quantiles, q); + } + + return v; + } + // summary 1: mean and standard deviation across samples + else if (summary == 1) + { + xt::xarray<double> v = xt::zeros<double>(shp); + + // compute mean + xt::view(v, xt::all(), xt::all(), xt::all(), 0) = + xt::mean(values, {axis}); + // compute standard deviation + xt::view(v, xt::all(), xt::all(), xt::all(), 1) = + xt::stddev(values, {axis}); + + return v; + } + // summary 0: raw (keep all samples) + else + { + return values; + } + } + } +} + +#endif //EVALHYD_UNCERTAINTY_HPP diff --git a/include/evalhyd/detail/utils.hpp b/include/evalhyd/detail/utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..abab9b40f333f4854c92fc3198cc22b39fe3e5ea --- /dev/null +++ b/include/evalhyd/detail/utils.hpp @@ -0,0 +1,143 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_UTILS_HPP +#define EVALHYD_UTILS_HPP + +#include <unordered_map> +#include <unordered_set> +#include <vector> +#include <stdexcept> + +#include <xtl/xoptional.hpp> +#include <xtensor/xtensor.hpp> +#include <xtensor/xrandom.hpp> + + +namespace evalhyd +{ + namespace utils + { + // Procedure to check that all elements in the list of metrics are + // valid metrics. + // + // \param requested_metrics + // Vector of strings for the metric(s) to be computed. + // \param valid_metrics + // Vector of strings for the metric(s) to can be computed. + inline void check_metrics ( + const std::vector<std::string>& requested_metrics, + const std::vector<std::string>& valid_metrics + ) + { + for (const auto& metric : requested_metrics) + { + if (std::find(valid_metrics.begin(), valid_metrics.end(), metric) + == valid_metrics.end()) + { + throw std::runtime_error( + "invalid evaluation metric: " + metric + ); + } + } + } + + // Procedure to check that all elements in the list of diagnostics are + // valid diagnostics. + // + // \param requested_diags + // Vector of strings for the diagnostic(s) to be computed. + // \param valid_diags + // Vector of strings for the diagnostic(s) to can be computed. + inline void check_diags ( + const std::vector<std::string>& requested_diags, + const std::vector<std::string>& valid_diags + ) + { + for (const auto& diag : requested_diags) + { + if (std::find(valid_diags.begin(), valid_diags.end(), diag) + == valid_diags.end()) + { + throw std::runtime_error( + "invalid evaluation diagnostic: " + diag + ); + } + } + } + + // Procedure to check that all elements for a bootstrap experiment + // are provided and valid. + // + // \param bootstrap + // Map of parameters for the bootstrap experiment. + inline void check_bootstrap ( + const std::unordered_map<std::string, int>& bootstrap + ) + { + // check n_samples + if (bootstrap.find("n_samples") == bootstrap.end()) + { + throw std::runtime_error( + "number of samples missing for bootstrap" + ); + } + auto n_samples = bootstrap.find("n_samples")->second; + if (n_samples < 1) + { + throw std::runtime_error( + "number of samples must be greater than zero" + ); + } + // check len_sample + if (bootstrap.find("len_sample") == bootstrap.end()) + { + throw std::runtime_error( + "length of sample missing for bootstrap" + ); + } + auto len_sample = bootstrap.find("len_sample")->second; + if (len_sample < 1) + { + throw std::runtime_error( + "length of sample must be greater than zero" + ); + } + // check summary + if (bootstrap.find("summary") == bootstrap.end()) + { + throw std::runtime_error( + "summary missing for bootstrap" + ); + } + auto summary = bootstrap.find("summary")->second; + if ((summary < 0) || (summary > 2)) + { + throw std::runtime_error( + "invalid value for bootstrap summary" + ); + } + } + + // Function to get a seed for random generators + // + // \param seed + // Optional value to use to set the seed for random generators. + // \return + // A seed value to use in random generators. + inline long int get_seed(xtl::xoptional<int, bool> seed) + { + if (seed.has_value()) + { + return seed.value(); + } + else + { + return std::time(nullptr); + } + } + } +} + +#endif //EVALHYD_UTILS_HPP diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp index 45ff6591af894998e42063bf2a6b534db476f237..19cbdbcecde37ea6f5e82ce593c273916d04bae4 100644 --- a/include/evalhyd/evald.hpp +++ b/include/evalhyd/evald.hpp @@ -1,12 +1,23 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + #ifndef EVALHYD_EVALD_HPP #define EVALHYD_EVALD_HPP #include <unordered_map> #include <vector> +#include <xtl/xoptional.hpp> +#include <xtensor/xexpression.hpp> #include <xtensor/xtensor.hpp> #include <xtensor/xarray.hpp> +#include "detail/utils.hpp" +#include "detail/masks.hpp" +#include "detail/uncertainty.hpp" +#include "detail/determinist/evaluator.hpp" + namespace evalhyd { @@ -14,16 +25,31 @@ namespace evalhyd /// /// \rst /// + /// :Template Parameters: + /// + /// XD2: Any 2-dimensional container class storing numeric elements + /// (e.g. ``xt::xtensor<double, 2>``, ``xt::pytensor<double, 2>``, + /// ``xt::rtensor<double, 2>``, etc.). + /// + /// XB3: Any 3-dimensional container class storing boolean elements + /// (e.g. ``xt::xtensor<bool, 3>``, ``xt::pytensor<bool, 3>``, + /// ``xt::rtensor<bool, 3>``, etc.). + /// + /// XS2: Any 2-dimensional container class storing string elements + /// (e.g. ``xt::xtensor<std::array<char, 32>, 2>``, + /// ``xt::pytensor<std::array<char, 32>, 2>``, + /// ``xt::rtensor<std::array<char, 32>, 2>``, etc.). + /// /// :Parameters: /// - /// q_obs: ``xt::xtensor<double, 2>`` + /// q_obs: ``XD2`` /// Streamflow observations. Time steps with missing observations /// must be assigned `NAN` values. Those time steps will be ignored /// both in the observations and the predictions before the /// *metrics* are computed. /// shape: (1, time) /// - /// q_prd: ``xt::xtensor<double, 2>`` + /// q_prd: ``XD2`` /// Streamflow predictions. Time steps with missing predictions /// must be assigned `NAN` values. Those time steps will be ignored /// both in the observations and the predictions before the @@ -33,6 +59,22 @@ namespace evalhyd /// metrics: ``std::vector<std::string>`` /// The sequence of evaluation metrics to be computed. /// + /// .. seealso:: :doc:`../../metrics/deterministic` + /// + /// q_thr: ``XD2``, optional + /// Streamflow exceedance threshold(s). If provided, *events* must + /// also be provided. + /// shape: (sites, thresholds) + /// + /// events: ``std::string``, optional + /// The type of streamflow events to consider for threshold + /// exceedance-based metrics. It can either be set as "high" when + /// flooding conditions/high flow events are evaluated (i.e. event + /// occurring when streamflow goes above threshold) or as "low" when + /// drought conditions/low flow events are evaluated (i.e. event + /// occurring when streamflow goes below threshold). It must be + /// provided if *q_thr* is provided. + /// /// transform: ``std::string``, optional /// The transformation to apply to both streamflow observations and /// predictions prior to the calculation of the *metrics*. @@ -41,51 +83,46 @@ namespace evalhyd /// /// exponent: ``double``, optional /// The value of the exponent n to use when the *transform* is the - /// power function. If not provided (or set to default value 1), - /// the streamflow observations and predictions remain untransformed. + /// power function. If not provided, the streamflow observations + /// and predictions remain untransformed. /// /// epsilon: ``double``, optional /// The value of the small constant ε to add to both the streamflow /// observations and predictions prior to the calculation of the /// *metrics* when the *transform* is the reciprocal function, the /// natural logarithm, or the power function with a negative exponent - /// (since none are defined for 0). If not provided (or set to default - /// value -9), one hundredth of the mean of the streamflow - /// observations is used as value for epsilon, as recommended by - /// `Pushpalatha et al. (2012) + /// (since none are defined for 0). If not provided, one hundredth of + /// the mean of the streamflow observations is used as value for + /// epsilon, as recommended by `Pushpalatha et al. (2012) /// <https://doi.org/10.1016/j.jhydrol.2011.11.055>`_. /// - /// t_msk: ``xt::xtensor<bool, 2>``, optional + /// t_msk: ``XB3``, optional /// Mask used to temporally subset of the whole streamflow time series /// (where True/False is used for the time steps to include/discard in - /// the subset). If provided, masks must feature the same number of - /// dimensions as observations and predictions, and it must - /// broadcastable with both of them. - /// shape: (subsets, time) + /// the subset). + /// shape: (series, subsets, time) /// /// .. seealso:: :doc:`../../functionalities/temporal-masking` /// - /// m_cdt: ``xt::xtensor<std::array<char, 32>, 1>``, optional + /// m_cdt: ``XS2``, optional /// Masking conditions to use to generate temporal subsets. Each /// condition consists in a string and can be specified on observed /// streamflow values/statistics (mean, median, quantile), or on time /// indices. If provided in combination with *t_msk*, the latter takes /// precedence. If not provided and neither is *t_msk*, no subset is - /// performed. If provided, there must be as many conditions as there - /// are time series of observations. - /// shape: (subsets,) + /// performed. + /// shape: (series, subsets) /// /// .. seealso:: :doc:`../../functionalities/conditional-masking` /// /// bootstrap: ``std::unordered_map<std::string, int>``, optional /// Parameters for the bootstrapping method used to estimate the /// sampling uncertainty in the evaluation of the predictions. - /// Three parameters are mandatory ('n_samples' the number of random - /// samples, 'len_sample' the length of one sample in number of years, + /// The parameters are: 'n_samples' the number of random samples, + /// 'len_sample' the length of one sample in number of years, /// and 'summary' the statistics to return to characterise the - /// sampling distribution), and one parameter is optional ('seed'). - /// If not provided, no bootstrapping is performed. If provided, - /// *dts* must also be provided. + /// sampling distribution). If not provided, no bootstrapping is + /// performed. If provided, *dts* must also be provided. /// /// .. seealso:: :doc:`../../functionalities/bootstrapping` /// @@ -97,12 +134,22 @@ namespace evalhyd /// 21st of May 2007 at 4 in the afternoon is "2007-05-21 16:00:00"). /// If provided, it is only used if *bootstrap* is also provided. /// + /// seed: ``int``, optional + /// A value for the seed used by random generators. This parameter + /// guarantees the reproducibility of the metric values between calls. + /// + /// diagnostics: ``std::vector<std::string>``, optional + /// The sequence of evaluation diagnostics to be computed. + /// + /// .. seealso:: :doc:`../../functionalities/diagnostics` + /// /// :Returns: /// /// ``std::vector<xt::xarray<double>>`` - /// The sequence of evaluation metrics computed - /// in the same order as given in *metrics*. - /// shape: (metrics,)<(series, subsets, samples)> + /// The sequence of evaluation metrics computed in the same order + /// as given in *metrics*, followed by the sequence of evaluation + /// diagnostics computed in the same order as given in *diagnostics*. + /// shape: (metrics+diagnostics,)<(series, subsets, samples, {components})> /// /// :Examples: /// @@ -132,24 +179,407 @@ namespace evalhyd /// /// .. code-block:: c++ /// - /// xt::xtensor<double, 2> msk = {{ 1, 1, 0, 1, 0 }}; + /// xt::xtensor<double, 3> msk = {{{ 1, 1, 0, 1, 0 }}}; /// /// evalhyd::evald(obs, prd, {"NSE"}, "none", 1, -9, msk); /// /// \endrst + template <class XD2, class XB3 = xt::xtensor<bool, 3>, + class XS2 = xt::xtensor<std::array<char, 32>, 2>> std::vector<xt::xarray<double>> evald( - const xt::xtensor<double, 2>& q_obs, - const xt::xtensor<double, 2>& q_prd, + const xt::xexpression<XD2>& q_obs, + const xt::xexpression<XD2>& q_prd, const std::vector<std::string>& metrics, - const std::string& transform = "none", - const double exponent = 1, - double epsilon = -9, - const xt::xtensor<bool, 2>& t_msk = {}, - const xt::xtensor<std::array<char, 32>, 1>& m_cdt = {}, - const std::unordered_map<std::string, int>& bootstrap = - {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}}, - const std::vector<std::string>& dts = {} - ); + const xt::xexpression<XD2>& q_thr = XD2({}), + xtl::xoptional<const std::string, bool> events = + xtl::missing<const std::string>(), + xtl::xoptional<const std::string, bool> transform = + xtl::missing<const std::string>(), + xtl::xoptional<double, bool> exponent = + xtl::missing<double>(), + xtl::xoptional<double, bool> epsilon = + xtl::missing<double>(), + const xt::xexpression<XB3>& t_msk = XB3({}), + const xt::xexpression<XS2>& m_cdt = XS2({}), + xtl::xoptional<const std::unordered_map<std::string, int>, bool> bootstrap = + xtl::missing<const std::unordered_map<std::string, int>>(), + const std::vector<std::string>& dts = {}, + xtl::xoptional<const int, bool> seed = + xtl::missing<const int>(), + xtl::xoptional<const std::vector<std::string>, bool> diagnostics = + xtl::missing<const std::vector<std::string>>() + ) + { + // check ranks of tensors + if (xt::get_rank<XD2>::value != 2) + { + throw std::runtime_error( + "observations and/or predictions and/or thresholds " + "are not two-dimensional" + ); + } + if (xt::get_rank<XB3>::value != 3) + { + throw std::runtime_error( + "temporal masks are not three-dimensional" + ); + } + + // retrieve real types of the expressions + const XD2& q_obs_ = q_obs.derived_cast(); + const XD2& q_prd_ = q_prd.derived_cast(); + const XD2& q_thr_ = q_thr.derived_cast(); + + const XB3& t_msk_ = t_msk.derived_cast(); + const XS2& m_cdt_ = m_cdt.derived_cast(); + + // check that the metrics/diagnostics to be computed are valid + utils::check_metrics( + metrics, + {"MAE", "MARE", "MSE", "RMSE", + "NSE", "KGE", "KGE_D", "KGEPRIME", "KGEPRIME_D", + // ------------------------------------------------------------ + // TODO: bring back when `xt::argsort` supports stable sorting + // so that the r_spearman component of KGENP and KGENP_D + // yields consistent results across compilers + // https://github.com/xtensor-stack/xtensor/issues/2677 + // "KGENP", "KGENP_D", + // ------------------------------------------------------------ + "CONT_TBL"} + ); + + if ( diagnostics.has_value() ) + { + utils::check_diags( + diagnostics.value(), + {"completeness"} + ); + } + + // check that optional parameters are valid + if (bootstrap.has_value()) + { + utils::check_bootstrap(bootstrap.value()); + } + + // get a seed for random generators + auto random_seed = utils::get_seed(seed); + + // check that data dimensions are compatible + // > time + if (q_obs_.shape(1) != q_prd_.shape(1)) + { + throw std::runtime_error( + "observations and predictions feature different " + "temporal lengths" + ); + } + if (t_msk_.size() > 0) + { + if (q_obs_.shape(1) != t_msk_.shape(2)) + { + throw std::runtime_error( + "observations and masks feature different " + "temporal lengths" + ); + } + } + if (!dts.empty()) + { + if (q_obs_.shape(1) != dts.size()) + { + throw std::runtime_error( + "observations and datetimes feature different " + "temporal lengths" + ); + } + } + + // > series + if (q_obs_.shape(0) != 1) + { + throw std::runtime_error( + "observations contain more than one time series" + ); + } + + if (q_thr_.size() > 0) + { + if (q_prd_.shape(0) != q_thr_.shape(0)) + { + throw std::runtime_error( + "predictions and thresholds feature different " + "numbers of series" + ); + } + } + + if (t_msk_.size() > 0) + { + if (q_prd_.shape(0) != t_msk_.shape(0)) + { + throw std::runtime_error( + "predictions and masks feature different " + "number of series" + ); + } + } + + if (m_cdt_.size() > 0) + { + if (q_prd_.shape(0) != m_cdt_.shape(0)) + { + throw std::runtime_error( + "predictions and masking conditions feature different " + "numbers of series" + ); + } + } + + // retrieve dimensions + std::size_t n_tim = q_prd_.shape(1); + + // generate masks from conditions if provided + auto gen_msk = [&]() + { + if ((t_msk_.size() < 1) && (m_cdt_.size() > 0)) + { + std::size_t n_srs = q_prd_.shape(0); + std::size_t n_msk = m_cdt_.shape(1); + + XB3 c_msk = xt::zeros<bool>({n_srs, n_msk, n_tim}); + + for (std::size_t s = 0; s < n_srs; s++) + { + for (std::size_t m = 0; m < n_msk; m++) + { + xt::view(c_msk, s, m) = + masks::generate_mask_from_conditions( + xt::view(m_cdt_, s, m), + xt::view(q_obs_, 0), + xt::view(q_prd_, s, xt::newaxis()) + ); + } + } + + return c_msk; + } + else + { + return XB3({}); + } + }; + const XB3 c_msk = gen_msk(); + + // apply streamflow transformation if requested + auto q_transform = [&](const XD2& q) + { + if (transform.has_value()) + { + if ( transform.value() == "sqrt" ) + { + return XD2(xt::sqrt(q)); + } + else if ( transform.value() == "inv" ) + { + if ( !epsilon.has_value() ) + { + // determine an epsilon value to avoid zero divide + epsilon = xt::mean(q_obs_)() * 0.01; + } + + return XD2(1. / (q + epsilon.value())); + } + else if ( transform.value() == "log" ) + { + if ( !epsilon.has_value() ) + { + // determine an epsilon value to avoid log zero + epsilon = xt::mean(q_obs_)() * 0.01; + } + + return XD2(xt::log(q + epsilon.value())); + } + else if ( transform.value() == "pow" ) + { + if ( exponent.has_value() ) + { + if ( exponent.value() == 1) + { + return q; + } + else if ( exponent.value() < 0 ) + { + if ( !epsilon.has_value() ) + { + // determine an epsilon value to avoid zero divide + epsilon = xt::mean(q_obs_)() * 0.01; + } + + return XD2(xt::pow(q + epsilon.value(), + exponent.value())); + } + else + { + return XD2(xt::pow(q, exponent.value())); + } + } + else + { + throw std::runtime_error( + "missing exponent for power transformation" + ); + } + } + else + { + throw std::runtime_error( + "invalid streamflow transformation: " + + transform.value() + ); + } + } + else + { + return q; + } + }; + + const XD2& obs = q_transform(q_obs_); + const XD2& prd = q_transform(q_prd_); + const XD2& thr = q_transform(q_thr_); + + // generate bootstrap experiment if requested + std::vector<xt::xkeep_slice<int>> exp; + int summary; + + if (bootstrap.has_value()) + { + auto n_samples = bootstrap.value().find("n_samples")->second; + auto len_sample = bootstrap.value().find("len_sample")->second; + summary = bootstrap.value().find("summary")->second; + + if (dts.empty()) + { + throw std::runtime_error( + "bootstrap requested but datetimes not provided" + ); + } + + exp = uncertainty::bootstrap( + dts, n_samples, len_sample, random_seed + ); + } + else + { + // if no bootstrap requested, generate one sample + // containing all the time indices once + summary = 0; + xt::xtensor<int, 1> all = xt::arange(n_tim); + exp.push_back(xt::keep(all)); + } + + // instantiate determinist evaluator + determinist::Evaluator<XD2, XB3> evaluator( + obs, prd, thr, events, + t_msk_.size() > 0 ? t_msk_: (m_cdt_.size() > 0 ? c_msk : t_msk_), + exp + ); + + // retrieve or compute requested metrics + std::vector<xt::xarray<double>> r; + + for ( const auto& metric : metrics ) + { + if ( metric == "MAE" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_MAE(), summary) + ); + } + if ( metric == "MARE" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_MARE(), summary) + ); + } + if ( metric == "MSE" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_MSE(), summary) + ); + } + if ( metric == "RMSE" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_RMSE(), summary) + ); + } + else if ( metric == "NSE" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_NSE(), summary) + ); + } + else if ( metric == "KGE" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_KGE(), summary) + ); + } + else if ( metric == "KGE_D" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_KGE_D(), summary) + ); + } + else if ( metric == "KGEPRIME" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_KGEPRIME(), summary) + ); + } + else if ( metric == "KGEPRIME_D" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_KGEPRIME_D(), summary) + ); + } + else if ( metric == "KGENP" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_KGENP(), summary) + ); + } + else if ( metric == "KGENP_D" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_KGENP_D(), summary) + ); + } + else if ( metric == "CONT_TBL" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_CONT_TBL(), summary) + ); + } + } + + if ( diagnostics.has_value() ) + { + for ( const auto& diagnostic : diagnostics.value() ) + { + if ( diagnostic == "completeness" ) + { + r.emplace_back( + evaluator.get_completeness() + ); + } + } + } + + return r; + }; } #endif //EVALHYD_EVALD_HPP diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp index 6fbce8f1bf4bbbcd6dcc9cf3de92e0bd9c1e93b8..d7e1d4279985fec564b21f41b21043edf22768ad 100644 --- a/include/evalhyd/evalp.hpp +++ b/include/evalhyd/evalp.hpp @@ -1,11 +1,23 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + #ifndef EVALHYD_EVALP_HPP #define EVALHYD_EVALP_HPP #include <unordered_map> #include <vector> +#include <xtl/xoptional.hpp> +#include <xtensor/xexpression.hpp> #include <xtensor/xtensor.hpp> #include <xtensor/xarray.hpp> +#include <xtensor/xadapt.hpp> + +#include "detail/utils.hpp" +#include "detail/masks.hpp" +#include "detail/uncertainty.hpp" +#include "detail/probabilist/evaluator.hpp" namespace evalhyd @@ -14,16 +26,35 @@ namespace evalhyd /// /// \rst /// + /// :Template Parameters: + /// + /// XD2: Any 2-dimensional container class storing numeric elements + /// (e.g. ``xt::xtensor<double, 2>``, ``xt::pytensor<double, 2>``, + /// ``xt::rtensor<double, 2>``, etc.). + /// + /// XD4: Any 4-dimensional container class storing numeric elements + /// (e.g. ``xt::xtensor<double, 4>``, ``xt::pytensor<double, 4>``, + /// ``xt::rtensor<double, 4>``, etc.). + /// + /// XB4: Any 4-dimensional container class storing boolean elements + /// (e.g. ``xt::xtensor<bool, 4>``, ``xt::pytensor<bool, 4>``, + /// ``xt::rtensor<bool, 4>``, etc.). + /// + /// XS2: Any 2-dimensional container class storing string elements + /// (e.g. ``xt::xtensor<std::array<char, 32>, 2>``, + /// ``xt::pytensor<std::array<char, 32>, 2>``, + /// ``xt::rtensor<std::array<char, 32>, 2>``, etc.). + /// /// :Parameters: /// - /// q_obs: ``xt::xtensor<double, 2>`` + /// q_obs: ``XD2`` /// Streamflow observations. Time steps with missing observations /// must be assigned `NAN` values. Those time steps will be ignored /// both in the observations and the predictions before the /// *metrics* are computed. /// shape: (sites, time) /// - /// q_prd: ``xt::xtensor<double, 4>`` + /// q_prd: ``XD4`` /// Streamflow predictions. Time steps with missing predictions /// must be assigned `NAN` values. Those time steps will be ignored /// both in the observations and the predictions before the @@ -33,11 +64,26 @@ namespace evalhyd /// metrics: ``std::vector<std::string>`` /// The sequence of evaluation metrics to be computed. /// - /// q_thr: ``xt::xtensor<double, 2>``, optional - /// Streamflow exceedance threshold(s). + /// .. seealso:: :doc:`../../metrics/probabilistic` + /// + /// q_thr: ``XD2``, optional + /// Streamflow exceedance threshold(s). If provided, *events* must + /// also be provided. /// shape: (sites, thresholds) /// - /// t_msk: ``xt::xtensor<bool, 4>``, optional + /// events: ``std::string``, optional + /// The type of streamflow events to consider for threshold + /// exceedance-based metrics. It can either be set as "high" when + /// flooding conditions/high flow events are evaluated (i.e. event + /// occurring when streamflow goes above threshold) or as "low" when + /// drought conditions/low flow events are evaluated (i.e. event + /// occurring when streamflow goes below threshold). It must be + /// provided if *q_thr* is provided. + /// + /// c_lvl: ``std::vector<double>``, optional + /// Confidence interval(s). + /// + /// t_msk: ``XB4``, optional /// Mask(s) used to generate temporal subsets of the whole streamflow /// time series (where True/False is used for the time steps to /// include/discard in a given subset). If not provided and neither @@ -48,15 +94,14 @@ namespace evalhyd /// /// .. seealso:: :doc:`../../functionalities/temporal-masking` /// - /// m_cdt: ``xt::xtensor<std::array<char, 32>, 2>``, optional + /// m_cdt: ``XS2``, optional /// Masking conditions to use to generate temporal subsets. Each /// condition consists in a string and can be specified on /// observed/predicted streamflow values/statistics (mean, median, /// quantile), or on time indices. If provided in combination with /// *t_msk*, the latter takes precedence. If not provided and neither /// is *t_msk*, no subset is performed and only one set of metrics is - /// returned corresponding to the whole time series. If provided, as - /// many sets of metrics are returned as they are conditions provided. + /// returned corresponding to the whole time series. /// shape: (sites, subsets) /// /// .. seealso:: :doc:`../../functionalities/conditional-masking` @@ -64,12 +109,11 @@ namespace evalhyd /// bootstrap: ``std::unordered_map<std::string, int>``, optional /// Parameters for the bootstrapping method used to estimate the /// sampling uncertainty in the evaluation of the predictions. - /// Three parameters are mandatory ('n_samples' the number of random - /// samples, 'len_sample' the length of one sample in number of years, + /// The parameters are: 'n_samples' the number of random samples, + /// 'len_sample' the length of one sample in number of years, /// and 'summary' the statistics to return to characterise the - /// sampling distribution), and one parameter is optional ('seed'). - /// If not provided, no bootstrapping is performed. If provided, - /// *dts* must also be provided. + /// sampling distribution). If not provided, no bootstrapping is + /// performed. If provided, *dts* must also be provided. /// /// .. seealso:: :doc:`../../functionalities/bootstrapping` /// @@ -81,13 +125,23 @@ namespace evalhyd /// 21st of May 2007 at 4 in the afternoon is "2007-05-21 16:00:00"). /// If provided, it is only used if *bootstrap* is also provided. /// + /// seed: ``int``, optional + /// A value for the seed used by random generators. This parameter + /// guarantees the reproducibility of the metric values between calls. + /// + /// diagnostics: ``std::vector<std::string>``, optional + /// The sequence of evaluation diagnostics to be computed. + /// + /// .. seealso:: :doc:`../../functionalities/diagnostics` + /// /// :Returns: /// /// ``std::vector<xt::xarray<double>>`` /// The sequence of evaluation metrics computed in the same order - /// as given in *metrics*. - /// shape: (metrics,)<(sites, lead times, subsets, samples, - /// {quantiles,} {thresholds,} {components})> + /// as given in *metrics*, followed by the sequence of evaluation + /// diagnostics computed in the same order as given in *diagnostics*. + /// shape: (metrics+diagnostics,)<(sites, lead times, subsets, samples, + /// {quantiles,} {thresholds,} {components,} {ranks,} {intervals})> /// /// :Examples: /// @@ -115,17 +169,414 @@ namespace evalhyd /// evalhyd::evalp(obs, prd, {"CRPS"}); /// /// \endrst + template <class XD2, class XD4, class XB4 = xt::xtensor<bool, 4>, + class XS2 = xt::xtensor<std::array<char, 32>, 2>> std::vector<xt::xarray<double>> evalp( - const xt::xtensor<double, 2>& q_obs, - const xt::xtensor<double, 4>& q_prd, + const xt::xexpression<XD2>& q_obs, + const xt::xexpression<XD4>& q_prd, const std::vector<std::string>& metrics, - const xt::xtensor<double, 2>& q_thr = {}, - const xt::xtensor<bool, 4>& t_msk = {}, - const xt::xtensor<std::array<char, 32>, 2>& m_cdt = {}, - const std::unordered_map<std::string, int>& bootstrap = - {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}}, - const std::vector<std::string>& dts = {} - ); + const xt::xexpression<XD2>& q_thr = XD2({}), + xtl::xoptional<const std::string, bool> events = + xtl::missing<const std::string>(), + const std::vector<double>& c_lvl = {}, + const xt::xexpression<XB4>& t_msk = XB4({}), + const xt::xexpression<XS2>& m_cdt = XS2({}), + xtl::xoptional<const std::unordered_map<std::string, int>, bool> bootstrap = + xtl::missing<const std::unordered_map<std::string, int>>(), + const std::vector<std::string>& dts = {}, + xtl::xoptional<const int, bool> seed = + xtl::missing<const int>(), + xtl::xoptional<const std::vector<std::string>, bool> diagnostics = + xtl::missing<const std::vector<std::string>>() + ) + { + // check ranks of tensors + if (xt::get_rank<XD2>::value != 2) + { + throw std::runtime_error( + "observations and/or thresholds are not two-dimensional" + ); + } + if (xt::get_rank<XD4>::value != 4) + { + throw std::runtime_error( + "predictions are not four-dimensional" + ); + } + if (xt::get_rank<XB4>::value != 4) + { + throw std::runtime_error( + "temporal masks are not four-dimensional" + ); + } + + // retrieve real types of the expressions + const XD2& q_obs_ = q_obs.derived_cast(); + const XD4& q_prd_ = q_prd.derived_cast(); + const XD2& q_thr_ = q_thr.derived_cast(); + + const XB4& t_msk_ = t_msk.derived_cast(); + const XS2& m_cdt_ = m_cdt.derived_cast(); + + // adapt vector to tensor + const xt::xtensor<double, 1> c_lvl_ = xt::adapt(c_lvl); + + // check that the metrics/diagnostics to be computed are valid + utils::check_metrics( + metrics, + {"BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", "CRPS_FROM_BS", + "CRPS_FROM_ECDF", + "QS", "CRPS_FROM_QS", + "POD", "POFD", "FAR", "CSI", "ROCSS", + "RANK_HIST", "DS", "AS", + "CR", "AW", "AWN", "AWI", "WS", "WSS", + "ES"} + ); + + if ( diagnostics.has_value() ) + { + utils::check_diags( + diagnostics.value(), + {"completeness"} + ); + } + + // check optional parameters + if (bootstrap.has_value()) + { + utils::check_bootstrap(bootstrap.value()); + } + + // get a seed for random generators + auto random_seed = utils::get_seed(seed); + + // check that data dimensions are compatible + // > time + if (q_obs_.shape(1) != q_prd_.shape(3)) + { + throw std::runtime_error( + "observations and predictions feature different " + "temporal lengths" + ); + } + if (t_msk_.size() > 0) + { + if (q_obs_.shape(1) != t_msk_.shape(3)) + { + throw std::runtime_error( + "observations and masks feature different " + "temporal lengths" + ); + } + } + if (!dts.empty()) + { + if (q_obs_.shape(1) != dts.size()) + { + throw std::runtime_error( + "observations and datetimes feature different " + "temporal lengths" + ); + } + } + + // > leadtimes + if (t_msk_.size() > 0) + { + if (q_prd_.shape(1) != t_msk_.shape(1)) + { + throw std::runtime_error( + "predictions and temporal masks feature different " + "numbers of lead times" + ); + } + } + + // > sites + if (q_obs_.shape(0) != q_prd_.shape(0)) + { + throw std::runtime_error( + "observations and predictions feature different " + "numbers of sites" + ); + } + + if (q_thr_.size() > 0) + { + if (q_obs_.shape(0) != q_thr_.shape(0)) + { + throw std::runtime_error( + "observations and thresholds feature different " + "numbers of sites" + ); + } + } + + if (t_msk_.size() > 0) + { + if (q_obs_.shape(0) != t_msk_.shape(0)) + { + throw std::runtime_error( + "observations and temporal masks feature different " + "numbers of sites" + ); + } + } + + if (m_cdt_.size() > 0) + { + if (q_obs_.shape(0) != m_cdt_.shape(0)) + { + throw std::runtime_error( + "observations and masking conditions feature different " + "numbers of sites" + ); + } + } + + // retrieve dimensions + std::size_t n_tim = q_prd_.shape(3); + + // generate masks from conditions if provided + auto gen_msk = [&]() + { + if ((t_msk_.size() < 1) && (m_cdt_.size() > 0)) + { + std::size_t n_sit = q_prd_.shape(0); + std::size_t n_ltm = q_prd_.shape(1); + std::size_t n_msk = m_cdt_.shape(1); + + XB4 c_msk = xt::zeros<bool>({n_sit, n_ltm, n_msk, n_tim}); + + for (std::size_t s = 0; s < n_sit; s++) + { + for (std::size_t l = 0; l < n_ltm; l++) + { + for (std::size_t m = 0; m < n_msk; m++) + { + xt::view(c_msk, s, l, m) = + masks::generate_mask_from_conditions( + xt::view(m_cdt_, s, m), + xt::view(q_obs_, s), + xt::view(q_prd_, s, l) + ); + } + } + } + + return c_msk; + } + else + { + return XB4({}); + } + }; + const XB4 c_msk = gen_msk(); + + // generate bootstrap experiment if requested + std::vector<xt::xkeep_slice<int>> b_exp; + int summary; + + if (bootstrap.has_value()) + { + auto n_samples = bootstrap.value().find("n_samples")->second; + auto len_sample = bootstrap.value().find("len_sample")->second; + summary = bootstrap.value().find("summary")->second; + + if (dts.empty()) + { + throw std::runtime_error( + "bootstrap requested but datetimes not provided" + ); + } + + b_exp = uncertainty::bootstrap( + dts, n_samples, len_sample, random_seed + ); + } + else + { + // if no bootstrap requested, generate one sample + // containing all the time indices once + summary = 0; + xt::xtensor<int, 1> all = xt::arange(n_tim); + b_exp.push_back(xt::keep(all)); + } + + // instantiate determinist evaluator + probabilist::Evaluator<XD2, XD4, XB4> evaluator( + q_obs_, q_prd_, q_thr_, c_lvl_, events, + t_msk_.size() > 0 ? t_msk_: (m_cdt_.size() > 0 ? c_msk : t_msk_), + b_exp, + random_seed + ); + + // initialise data structure for outputs + std::vector<xt::xarray<double>> r; + + for ( const auto& metric : metrics ) + { + if ( metric == "BS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_BS(), summary) + ); + } + else if ( metric == "BSS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_BSS(), summary) + ); + } + else if ( metric == "BS_CRD" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_BS_CRD(), summary) + ); + } + else if ( metric == "BS_LBD" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_BS_LBD(), summary) + ); + } + else if ( metric == "REL_DIAG" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_REL_DIAG(), summary) + ); + } + else if ( metric == "CRPS_FROM_BS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_CRPS_FROM_BS(), summary) + ); + } + else if ( metric == "CRPS_FROM_ECDF" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_CRPS_FROM_ECDF(), summary) + ); + } + else if ( metric == "QS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_QS(), summary) + ); + } + else if ( metric == "CRPS_FROM_QS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_CRPS_FROM_QS(), summary) + ); + } + else if ( metric == "POD" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_POD(), summary) + ); + } + else if ( metric == "POFD" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_POFD(), summary) + ); + } + else if ( metric == "FAR" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_FAR(), summary) + ); + } + else if ( metric == "CSI" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_CSI(), summary) + ); + } + else if ( metric == "ROCSS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_ROCSS(), summary) + ); + } + else if ( metric == "RANK_HIST" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_RANK_HIST(), summary) + ); + } + else if ( metric == "DS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_DS(), summary) + ); + } + else if ( metric == "AS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_AS(), summary) + ); + } + else if ( metric == "CR" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_CR(), summary) + ); + } + else if ( metric == "AW" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_AW(), summary) + ); + } + else if ( metric == "AWN" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_AWN(), summary) + ); + } + else if ( metric == "AWI" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_AWI(), summary) + ); + } + else if ( metric == "WS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_WS(), summary) + ); + } + else if ( metric == "WSS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_WSS(), summary) + ); + } + else if ( metric == "ES" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_ES(), summary) + ); + } + } + + if ( diagnostics.has_value() ) + { + for ( const auto& diagnostic : diagnostics.value() ) + { + if ( diagnostic == "completeness" ) + { + r.emplace_back( + evaluator.get_completeness() + ); + } + } + } + + return r; + } } #endif //EVALHYD_EVALP_HPP diff --git a/src/determinist/evald.cpp b/src/determinist/evald.cpp deleted file mode 100644 index 0ce84b6f9d6649c0f95b0ff8ac978552cc3c2df1..0000000000000000000000000000000000000000 --- a/src/determinist/evald.cpp +++ /dev/null @@ -1,265 +0,0 @@ -#include <unordered_map> -#include <vector> -#include <array> -#include <stdexcept> -#include <xtensor/xexpression.hpp> -#include <xtensor/xarray.hpp> -#include <xtensor/xscalar.hpp> - -#include "evalhyd/evald.hpp" - -#include "utils.hpp" -#include "masks.hpp" -#include "maths.hpp" -#include "uncertainty.hpp" -#include "determinist/evaluator.hpp" - -namespace eh = evalhyd; - -namespace evalhyd -{ - std::vector<xt::xarray<double>> evald( - const xt::xtensor<double, 2>& q_obs, - const xt::xtensor<double, 2>& q_prd, - const std::vector<std::string>& metrics, - const std::string& transform, - const double exponent, - double epsilon, - const xt::xtensor<bool, 2>& t_msk, - const xt::xtensor<std::array<char, 32>, 1>& m_cdt, - const std::unordered_map<std::string, int>& bootstrap, - const std::vector<std::string>& dts - ) - { - // check that the metrics to be computed are valid - utils::check_metrics( - metrics, - {"RMSE", "NSE", "KGE", "KGEPRIME"} - ); - - // check that optional parameters are valid - eh::utils::check_bootstrap(bootstrap); - - // check that data dimensions are compatible - // > time - if (q_obs.shape(1) != q_prd.shape(1)) - throw std::runtime_error( - "observations and predictions feature different " - "temporal lengths" - ); - if (t_msk.size() > 0) - if (q_obs.shape(1) != t_msk.shape(1)) - throw std::runtime_error( - "observations and masks feature different " - "temporal lengths" - ); - if (!dts.empty()) - if (q_obs.shape(1) != dts.size()) - throw std::runtime_error( - "observations and datetimes feature different " - "temporal lengths" - ); - // > series - if (q_obs.shape(0) != 1) - throw std::runtime_error( - "observations contain more than one time series" - ); - - // retrieve dimensions - std::size_t n_tim = q_obs.shape(1); - std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(0) : - (m_cdt.size() > 0 ? m_cdt.shape(0) : 1); - - // initialise a mask if none provided - // (corresponding to no temporal subset) - auto gen_msk = [&]() { - // if t_msk provided, it takes priority - if (t_msk.size() > 0) - return t_msk; - // else if m_cdt provided, use them to generate t_msk - else if (m_cdt.size() > 0) - { - xt::xtensor<bool, 2> c_msk = xt::zeros<bool>({n_msk, n_tim}); - - for (int m = 0; m < n_msk; m++) - xt::view(c_msk, m) = - eh::masks::generate_mask_from_conditions( - m_cdt[0], xt::view(q_obs, 0), q_prd - ); - - return c_msk; - } - // if neither t_msk nor m_cdt provided, generate dummy mask - else - return xt::xtensor<bool, 2>{xt::ones<bool>({std::size_t{1}, n_tim})}; - }; - - auto msk = gen_msk(); - - // apply streamflow transformation if requested - auto q_transform = [&](const xt::xtensor<double, 2>& q) - { - if ( transform == "none" || (transform == "pow" && exponent == 1)) - { - return q; - } - else if ( transform == "sqrt" ) - { - return xt::eval(xt::sqrt(q)); - } - else if ( transform == "inv" ) - { - if ( epsilon == -9 ) - // determine an epsilon value to avoid zero divide - epsilon = xt::mean(q_obs)() * 0.01; - - return xt::eval(1. / (q + epsilon)); - } - else if ( transform == "log" ) - { - if ( epsilon == -9 ) - // determine an epsilon value to avoid log zero - epsilon = xt::mean(q_obs)() * 0.01; - - return xt::eval(xt::log(q + epsilon)); - } - else if ( transform == "pow" ) - { - if ( exponent < 0 ) - { - if ( epsilon == -9 ) - // determine an epsilon value to avoid zero divide - epsilon = xt::mean(q_obs)() * 0.01; - - return xt::eval(xt::pow(q + epsilon, exponent)); - } - else - { - return xt::eval(xt::pow(q, exponent)); - } - } - else - { - throw std::runtime_error( - "invalid streamflow transformation: " + transform - ); - } - }; - - auto obs = q_transform(q_obs); - auto prd = q_transform(q_prd); - - // generate bootstrap experiment if requested - std::vector<xt::xkeep_slice<int>> exp; - auto n_samples = bootstrap.find("n_samples")->second; - auto len_sample = bootstrap.find("len_sample")->second; - if ((n_samples != -9) && (len_sample != -9)) - { - if (dts.empty()) - throw std::runtime_error( - "bootstrap requested but datetimes not provided" - ); - - exp = eh::uncertainty::bootstrap( - dts, n_samples, len_sample - ); - } - else - { - // if no bootstrap requested, generate one sample - // containing all the time indices once - xt::xtensor<int, 1> all = xt::arange(n_tim); - exp.push_back(xt::keep(all)); - } - - // instantiate determinist evaluator - eh::determinist::Evaluator evaluator(obs, prd, msk, exp); - - // declare maps for memoisation purposes - std::unordered_map<std::string, std::vector<std::string>> elt; - std::unordered_map<std::string, std::vector<std::string>> dep; - - // register potentially recurring computation elt across metrics - elt["RMSE"] = {"quad_err"}; - elt["NSE"] = {"mean_obs", "quad_obs", "quad_err"}; - elt["KGE"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", - "r_pearson", "alpha", "bias"}; - elt["KGEPRIME"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", - "r_pearson", "alpha", "bias"}; - - // register nested metrics (i.e. metric dependent on another metric) - // TODO - - // determine required elt/dep to be pre-computed - std::vector<std::string> req_elt; - std::vector<std::string> req_dep; - - eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); - - // pre-compute required elt - for ( const auto& element : req_elt ) - { - if ( element == "mean_obs" ) - evaluator.calc_mean_obs(); - else if ( element == "mean_prd" ) - evaluator.calc_mean_prd(); - else if ( element == "quad_err" ) - evaluator.calc_quad_err(); - else if ( element == "quad_obs" ) - evaluator.calc_quad_obs(); - else if ( element == "quad_prd" ) - evaluator.calc_quad_prd(); - else if ( element == "r_pearson" ) - evaluator.calc_r_pearson(); - else if ( element == "alpha" ) - evaluator.calc_alpha(); - else if ( element == "bias" ) - evaluator.calc_bias(); - } - - // pre-compute required dep - for ( const auto& dependency : req_dep ) - { - // TODO - } - - // retrieve or compute requested metrics - std::vector<xt::xarray<double>> r; - - auto summary = bootstrap.find("summary")->second; - - for ( const auto& metric : metrics ) - { - if ( metric == "RMSE" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_RMSE(); - r.emplace_back(eh::uncertainty::summarise(evaluator.RMSE, summary)); - } - else if ( metric == "NSE" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_NSE(); - r.emplace_back(eh::uncertainty::summarise(evaluator.NSE, summary)); - } - else if ( metric == "KGE" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_KGE(); - r.emplace_back(eh::uncertainty::summarise(evaluator.KGE, summary)); - } - else if ( metric == "KGEPRIME" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_KGEPRIME(); - r.emplace_back(eh::uncertainty::summarise(evaluator.KGEPRIME, summary)); - } - } - - return r; - } -} diff --git a/src/determinist/evaluator.hpp b/src/determinist/evaluator.hpp deleted file mode 100644 index 0fdab32c129ec69faa22bc9a0f4f93e6646fce1f..0000000000000000000000000000000000000000 --- a/src/determinist/evaluator.hpp +++ /dev/null @@ -1,484 +0,0 @@ -#ifndef EVALHYD_DETERMINIST_EVALUATOR_HPP -#define EVALHYD_DETERMINIST_EVALUATOR_HPP - -#include <vector> - -#include <xtensor/xexpression.hpp> -#include <xtensor/xtensor.hpp> - -#include "../maths.hpp" - -namespace evalhyd -{ - namespace determinist - { - class Evaluator - { - private: - // members for input data - const xt::xtensor<double, 2>& q_obs; - const xt::xtensor<double, 2>& q_prd; - xt::xtensor<bool, 3> t_msk; - const std::vector<xt::xkeep_slice<int>>& b_exp; - - // members for dimensions - std::size_t n_tim; - std::size_t n_msk; - std::size_t n_srs; - std::size_t n_exp; - std::vector<std::size_t> inner_dims; - std::vector<std::size_t> inter_dims; - std::vector<std::size_t> mean_dims; - std::vector<std::size_t> final_dims; - - // members for computational elements - xt::xtensor<double, 4> mean_obs; - xt::xtensor<double, 4> mean_prd; - xt::xtensor<double, 2> quad_err; - xt::xtensor<double, 4> quad_obs; - xt::xtensor<double, 4> quad_prd; - xt::xtensor<double, 3> r_pearson; - xt::xtensor<double, 3> alpha; - xt::xtensor<double, 3> bias; - - public: - // constructor method - Evaluator(const xt::xtensor<double, 2>& obs, - const xt::xtensor<double, 2>& prd, - const xt::xtensor<bool, 2>& msk, - const std::vector<xt::xkeep_slice<int>>& exp) : - q_obs{obs}, q_prd{prd}, b_exp{exp} - { - // determine size for recurring dimensions - n_tim = q_prd.shape(1); - n_srs = q_prd.shape(0); - n_msk = msk.shape(0); - n_exp = b_exp.size(); - - // determine dimensions for inner components, intermediate - // elements, for time average elements, and for final metrics: - // -> inner components shape: (samples, subsets, series) - inner_dims = {n_msk, n_exp, n_srs}; - // -> intermediate elements shape: (samples, subsets, series, time) - inter_dims = {n_msk, n_exp, n_srs, n_tim}; - // -> time average elements shape: (samples, subsets, series, 1) - mean_dims = {n_msk, n_exp, n_srs, 1}; - // -> final metrics shape: (series, subsets, samples) - final_dims = {n_srs, n_msk, n_exp}; - - // drop time steps where observations or predictions are NaN - auto obs_nan = xt::isnan(q_obs); - auto prd_nan = xt::isnan(q_prd); - - t_msk = xt::ones<bool>({n_msk, n_srs, n_tim}); - xt::view(t_msk, xt::all()) = - xt::view(msk, xt::all(), xt::newaxis(), xt::all()); - for (int m = 0; m < n_msk; m++) { - xt::view(t_msk, m) = - xt::where(obs_nan | prd_nan, - false, xt::view(t_msk, m)); - } - }; - - // members for evaluation metrics - xt::xtensor<double, 3> RMSE; - xt::xtensor<double, 3> NSE; - xt::xtensor<double, 3> KGE; - xt::xtensor<double, 3> KGEPRIME; - - // methods to compute elements - void calc_mean_obs(); - void calc_mean_prd(); - void calc_quad_err(); - void calc_quad_obs(); - void calc_quad_prd(); - void calc_r_pearson(); - void calc_alpha(); - void calc_bias(); - - // methods to compute metrics - void calc_RMSE(); - void calc_NSE(); - void calc_KGE(); - void calc_KGEPRIME(); - }; - - // Compute the mean of the observations. - // - // \require q_obs: - // Streamflow observations. - // shape: (series, time) - // \assign mean_obs: - // Mean observed streamflow. - // shape: (subsets, samples, series, 1) - void Evaluator::calc_mean_obs() - { - mean_obs = xt::zeros<double>(mean_dims); - for (int m = 0; m < n_msk; m++) { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto obs_masked = xt::where(xt::view(t_msk, m), q_obs, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) { - // apply the bootstrap sampling - auto obs = xt::view(obs_masked, xt::all(), b_exp[e]); - xt::view(mean_obs, m, e) = - xt::nanmean(obs, -1, xt::keep_dims); - } - } - } - - // Compute the mean of the predictions. - // - // \require q_prd: - // Streamflow predictions. - // shape: (series, time) - // \assign mean_prd: - // Mean predicted streamflow. - // shape: (subsets, samples, series, 1) - void Evaluator::calc_mean_prd() - { - mean_prd = xt::zeros<double>(mean_dims); - for (int m = 0; m < n_msk; m++) { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto prd_masked = xt::where(xt::view(t_msk, m), q_prd, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) { - // apply the bootstrap sampling - auto prd = xt::view(prd_masked, xt::all(), b_exp[e]); - xt::view(mean_prd, m, e) = - xt::nanmean(prd, -1, xt::keep_dims); - } - } - } - - // Compute the quadratic error between observations and predictions. - // - // \require q_obs: - // Streamflow observations. - // shape: (series, time) - // \require q_prd: - // Streamflow predictions. - // shape: (series, time) - // \assign quad_err: - // Quadratic errors between observations and predictions. - // shape: (series, time) - - void Evaluator::calc_quad_err() - { - quad_err = xt::square(q_obs - q_prd); - } - - // Compute the quadratic error between observations and mean observation. - // - // \require q_obs: - // Streamflow observations. - // shape: (series, time) - // \require mean_obs: - // Mean observed streamflow. - // shape: (samples, series, 1) - // \assign quad_obs: - // Quadratic errors between observations and mean observation. - // shape: (subsets, samples, series, time) - void Evaluator::calc_quad_obs() - { - quad_obs = xt::zeros<double>(inter_dims); - for (int m = 0; m < n_msk; m++) { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto obs_masked = xt::where(xt::view(t_msk, m), q_obs, NAN); - - for (int e = 0; e < n_exp; e++) { - xt::view(quad_obs, m, e) = xt::square( - obs_masked - xt::view(mean_obs, m, e) - ); - } - } - } - - // Compute the quadratic error between predictions and mean prediction. - // - // \require q_prd: - // Streamflow predictions. - // shape: (series, time) - // \require mean_prd: - // Mean predicted streamflow. - // shape: (samples, series, 1) - // \assign quad_prd: - // Quadratic errors between predictions and mean prediction. - // shape: (subsets, samples, series, time) - void Evaluator::calc_quad_prd() - { - quad_prd = xt::zeros<double>(inter_dims); - for (int m = 0; m < n_msk; m++) { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto prd_masked = xt::where(xt::view(t_msk, m), q_prd, NAN); - - for (int e = 0; e < n_exp; e++) { - xt::view(quad_prd, m, e) = xt::square( - prd_masked - xt::view(mean_prd, m, e) - ); - } - } - } - - // Compute the Pearson correlation coefficient. - // - // \require q_obs: - // Streamflow observations. - // shape: (series, time) - // \require q_prd: - // Streamflow predictions. - // shape: (series, time) - // \require mean_obs: - // Mean observed streamflow. - // shape: (samples, series, 1) - // \require mean_prd: - // Mean predicted streamflow. - // shape: (samples, series, 1) - // \require quad_obs: - // Quadratic errors between observations and mean observation. - // shape: (samples, series, time) - // \require quad_prd: - // Quadratic errors between predictions and mean prediction. - // shape: (samples, series, time) - // \assign r_pearson: - // Pearson correlation coefficients. - // shape: (subsets, samples, series) - void Evaluator::calc_r_pearson() - { - // calculate error in timing and dynamics $r_{pearson}$ - // (Pearson's correlation coefficient) - r_pearson = xt::zeros<double>(inner_dims); - for (int m = 0; m < n_msk; m++) { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto prd_masked = xt::where(xt::view(t_msk, m), q_prd, NAN); - auto obs_masked = xt::where(xt::view(t_msk, m), q_obs, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) { - auto prd = xt::view(prd_masked, xt::all(), b_exp[e]); - auto obs = xt::view(obs_masked, xt::all(), b_exp[e]); - auto r_num = xt::nansum( - (prd - xt::view(mean_prd, m, e)) - * (obs - xt::view(mean_obs, m, e)), - -1 - ); - - auto prd2 = xt::view(quad_prd, m, e, xt::all(), b_exp[e]); - auto obs2 = xt::view(quad_obs, m, e, xt::all(), b_exp[e]); - auto r_den = xt::sqrt( - xt::nansum(prd2, -1) * xt::nansum(obs2, -1) - ); - - xt::view(r_pearson, m, e) = r_num / r_den; - } - } - } - - // Compute alpha. - // - // \require q_obs: - // Streamflow observations. - // shape: (series, time) - // \require q_prd: - // Streamflow predictions. - // shape: (series, time) - // \require mean_obs: - // Mean observed streamflow. - // shape: (samples, series, 1) - // \require mean_prd: - // Mean predicted streamflow. - // shape: (samples, series, 1) - // \assign alpha: - // Alphas, ratios of standard deviations. - // shape: (subsets, samples, series) - void Evaluator::calc_alpha() - { - // calculate error in spread of flow $alpha$ - alpha = xt::zeros<double>(inner_dims); - for (int m = 0; m < n_msk; m++) { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto prd_masked = xt::where(xt::view(t_msk, m), q_prd, NAN); - auto obs_masked = xt::where(xt::view(t_msk, m), q_obs, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) { - auto prd = xt::view(prd_masked, xt::all(), b_exp[e]); - auto obs = xt::view(obs_masked, xt::all(), b_exp[e]); - xt::view(alpha, m, e) = - evalhyd::maths::nanstd(prd, xt::view(mean_prd, m, e)) - / evalhyd::maths::nanstd(obs, xt::view(mean_obs, m, e)); - } - } - } - - // Compute the bias. - // - // \require q_obs: - // Streamflow observations. - // shape: (series, time) - // \require q_prd: - // Streamflow predictions. - // shape: (series, time) - // \assign bias: - // Biases. - // shape: (subsets, samples, series) - void Evaluator::calc_bias() - { - // calculate $bias$ - bias = xt::zeros<double>(inner_dims); - for (int m = 0; m < n_msk; m++) { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto prd_masked = xt::where(xt::view(t_msk, m), q_prd, NAN); - auto obs_masked = xt::where(xt::view(t_msk, m), q_obs, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) { - auto prd = xt::view(prd_masked, xt::all(), b_exp[e]); - auto obs = xt::view(obs_masked, xt::all(), b_exp[e]); - xt::view(bias, m, e) = - xt::nansum(prd, -1) / xt::nansum(obs, -1); - } - } - } - - // Compute the root-mean-square error (RMSE). - // - // \require quad_err: - // Quadratic errors between observations and predictions. - // shape: (series, time) - // \assign RMSE: - // Root-mean-square errors. - // shape: (series, subsets, samples) - void Evaluator::calc_RMSE() - { - // compute RMSE - RMSE = xt::zeros<double>(final_dims); - for (int m = 0; m < n_msk; m++) { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto quad_err_masked = xt::where(xt::view(t_msk, m), quad_err, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) { - auto err2 = xt::view(quad_err_masked, xt::all(), b_exp[e]); - xt::view(RMSE, xt::all(), m, e) = xt::sqrt(xt::nanmean(err2, -1)); - } - } - } - - // Compute the Nash-Sutcliffe Efficiency (NSE). - // - // \require quad_err: - // Quadratic errors between observations and predictions. - // shape: (series, time) - // \require quad_obs: - // Quadratic errors between observations and mean observation. - // shape: (samples, series, time) - // \assign NSE: - // Nash-Sutcliffe efficiencies. - // shape: (series, subsets, samples) - void Evaluator::calc_NSE() - { - NSE = xt::zeros<double>(final_dims); - for (int m = 0; m < n_msk; m++) { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto quad_err_masked = xt::where(xt::view(t_msk, m), quad_err, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) { - // compute squared errors operands - auto err2 = xt::view(quad_err_masked, xt::all(), b_exp[e]); - xt::xtensor<double, 1> f_num = - xt::nansum(err2, -1); - auto obs2 = xt::view(quad_obs, m, e, xt::all(), b_exp[e]); - xt::xtensor<double, 1> f_den = - xt::nansum(obs2, -1); - - // compute NSE - xt::view(NSE, xt::all(), m, e) = 1 - (f_num / f_den); - } - } - } - - // Compute the Kling-Gupta Efficiency (KGE). - // - // \require r_pearson: - // Pearson correlation coefficients. - // shape: (samples, series) - // \require alpha: - // Alphas, ratios of standard deviations. - // shape: (samples, series) - // \require bias: - // Biases. - // shape: (samples, series) - // \assign KGE: - // Kling-Gupta efficiencies. - // shape: (series, subsets, samples) - void Evaluator::calc_KGE() - { - KGE = xt::zeros<double>(final_dims); - for (int m = 0; m < n_msk; m++) { - for (int e = 0; e < n_exp; e++) { - // compute KGE - xt::view(KGE, xt::all(), m, e) = 1 - xt::sqrt( - xt::square(xt::view(r_pearson, m, e) - 1) - + xt::square(xt::view(alpha, m, e) - 1) - + xt::square(xt::view(bias, m, e) - 1) - ); - } - } - } - - // Compute the modified Kling-Gupta Efficiency (KGEPRIME). - // - // \require mean_obs: - // Mean observed streamflow. - // shape: (samples, series, 1) - // \require mean_prd: - // Mean predicted streamflow. - // shape: (samples, series, 1) - // \require r_pearson: - // Pearson correlation coefficients. - // shape: (samples, series) - // \require alpha: - // Alphas, ratios of standard deviations. - // shape: (samples, series) - // \require bias: - // Biases. - // shape: (samples, series) - // \assign KGEPRIME: - // Modified Kling-Gupta efficiencies. - // shape: (series, subsets, samples) - void Evaluator::calc_KGEPRIME() - { - KGEPRIME = xt::zeros<double>(final_dims); - for (int m = 0; m < n_msk; m++) { - for (int e = 0; e < n_exp; e++) { - // calculate error in spread of flow $gamma$ - auto gamma = xt::view(alpha, m, e) - * (xt::view(mean_obs, m, e, xt::all(), 0) - / xt::view(mean_prd, m, e, xt::all(), 0)); - - // compute KGEPRIME - xt::view(KGEPRIME, xt::all(), m, e) = 1 - xt::sqrt( - xt::square(xt::view(r_pearson, m, e) - 1) - + xt::square(gamma - 1) - + xt::square(xt::view(bias, m, e) - 1) - ); - } - } - } - } -} - -#endif //EVALHYD_DETERMINIST_EVALUATOR_HPP diff --git a/src/maths.hpp b/src/maths.hpp deleted file mode 100644 index 7fccf315e87c670a4f8d5b64c64d9c54bada7113..0000000000000000000000000000000000000000 --- a/src/maths.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef EVALHYD_MATHS_HPP -#define EVALHYD_MATHS_HPP - -#include <xtensor/xtensor.hpp> -#include <xtensor/xview.hpp> -#include <xtensor/xsort.hpp> -#include <xtensor/xbuilder.hpp> -#include <xtensor/xutils.hpp> - -#include <cmath> - -namespace evalhyd -{ - namespace maths - { - // TODO: substitute with `xt::stddev` when fixed for `xt::rtensor` - // (see https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-r/-/issues/1) - // function to calculate standard deviation on last axis of n-dim expressions - template <class A1, class A2> - inline auto nanstd(A1&& arr, A2&& mean_arr) - { - return xt::sqrt( - xt::nanmean(xt::square(xt::abs(arr - mean_arr)), -1) - ); - } - - // function to calculate quantile on 1-dim expressions - inline double quantile(const xt::xtensor<double, 1>& t, double p) - { - std::size_t n = t.size(); - - // compute virtual index - auto virtual_idx = (double(n) - 1) * p; - - if (std::fmod(virtual_idx, 1) == 0) - // virtual index is an integer - { - std::size_t i = {static_cast<unsigned long long>(virtual_idx)}; - std::array<std::size_t, 1> kth = {i}; - auto values = xt::partition(t, kth); - return xt::mean(xt::view(values, xt::range(i, i + 1)))(); - } - else - // virtual index is not an integer - { - // determine indices before and after virtual index - std::size_t prv_idx = std::floor(virtual_idx); - std::size_t nxt_idx = prv_idx + 1; - - // deal with edge cases - if (virtual_idx > double(n) - 1) - { - prv_idx = n - 1; - nxt_idx = n - 1; - } - if (virtual_idx < 0) - { - prv_idx = 0; - nxt_idx = 0; - } - - std::array<std::size_t, 2> kth = {prv_idx, nxt_idx}; - auto values = xt::partition(t, kth); - auto vw = xt::view(values, xt::range(prv_idx, nxt_idx + 1)); - - // perform linear interpolation to determine quantile - auto gamma = virtual_idx - double(prv_idx); - auto prv = xt::amin(vw); - auto nxt = xt::amax(vw); - - return (prv + (nxt - prv) * gamma)(); - } - } - } -} - -#endif //EVALHYD_MATHS_HPP diff --git a/src/probabilist/evalp.cpp b/src/probabilist/evalp.cpp deleted file mode 100644 index 48f709d6feef3bfd83b848b726ab1154d48c030d..0000000000000000000000000000000000000000 --- a/src/probabilist/evalp.cpp +++ /dev/null @@ -1,286 +0,0 @@ -#include <utility> -#include <unordered_map> -#include <vector> -#include <array> -#include <stdexcept> -#include <xtensor/xtensor.hpp> -#include <xtensor/xarray.hpp> -#include <xtensor/xview.hpp> - -#include "evalhyd/evalp.hpp" -#include "utils.hpp" -#include "masks.hpp" -#include "maths.hpp" -#include "uncertainty.hpp" -#include "probabilist/evaluator.hpp" - -namespace eh = evalhyd; - -namespace evalhyd -{ - std::vector<xt::xarray<double>> evalp( - const xt::xtensor<double, 2>& q_obs, - const xt::xtensor<double, 4>& q_prd, - const std::vector<std::string>& metrics, - const xt::xtensor<double, 2>& q_thr, - const xt::xtensor<bool, 4>& t_msk, - const xt::xtensor<std::array<char, 32>, 2>& m_cdt, - const std::unordered_map<std::string, int>& bootstrap, - const std::vector<std::string>& dts - ) - { - // check that the metrics to be computed are valid - eh::utils::check_metrics( - metrics, - {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"} - ); - - // check that optional parameters are given as arguments - eh::utils::evalp::check_optionals(metrics, q_thr); - eh::utils::check_bootstrap(bootstrap); - - // check that data dimensions are compatible - // > time - if (q_obs.shape(1) != q_prd.shape(3)) - throw std::runtime_error( - "observations and predictions feature different " - "temporal lengths" - ); - if (t_msk.size() > 0) - if (q_obs.shape(1) != t_msk.shape(3)) - throw std::runtime_error( - "observations and masks feature different " - "temporal lengths" - ); - if (!dts.empty()) - if (q_obs.shape(1) != dts.size()) - throw std::runtime_error( - "observations and datetimes feature different " - "temporal lengths" - ); - // > leadtimes - if (t_msk.size() > 0) - if (q_prd.shape(1) != t_msk.shape(1)) - throw std::runtime_error( - "predictions and temporal masks feature different " - "numbers of lead times" - ); - // > sites - if (q_obs.shape(0) != q_prd.shape(0)) - throw std::runtime_error( - "observations and predictions feature different " - "numbers of sites" - ); - if (t_msk.size() > 0) - if (q_obs.shape(0) != t_msk.shape(0)) - throw std::runtime_error( - "observations and temporal masks feature different " - "numbers of sites" - ); - if (m_cdt.size() > 0) - if (q_obs.shape(0) != m_cdt.shape(0)) - throw std::runtime_error( - "observations and masking conditions feature different " - "numbers of sites" - ); - - // retrieve dimensions - std::size_t n_sit = q_prd.shape(0); - std::size_t n_ltm = q_prd.shape(1); - std::size_t n_mbr = q_prd.shape(2); - std::size_t n_tim = q_prd.shape(3); - std::size_t n_thr = q_thr.shape(1); - std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(2) : - (m_cdt.size() > 0 ? m_cdt.shape(1) : 1); - std::size_t n_exp = bootstrap.find("n_samples")->second == -9 ? 1: - bootstrap.find("n_samples")->second; - - // register metrics number of dimensions - std::unordered_map<std::string, std::vector<std::size_t>> dim; - - dim["BS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; - dim["BSS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; - dim["BS_CRD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; - dim["BS_LBD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; - dim["QS"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr}; - dim["CRPS"] = {n_sit, n_ltm, n_msk, n_exp}; - - // declare maps for memoisation purposes - std::unordered_map<std::string, std::vector<std::string>> elt; - std::unordered_map<std::string, std::vector<std::string>> dep; - - // register potentially recurring computation elements across metrics - elt["bs"] = {"o_k", "y_k"}; - elt["BSS"] = {"o_k", "bar_o"}; - elt["BS_CRD"] = {"o_k", "bar_o", "y_k"}; - elt["BS_LBD"] = {"o_k", "y_k"}; - elt["qs"] = {"q_qnt"}; - - // register nested metrics (i.e. metric dependent on another metric) - dep["BS"] = {"bs"}; - dep["BSS"] = {"bs"}; - dep["QS"] = {"qs"}; - dep["CRPS"] = {"qs", "crps"}; - - // determine required elt/dep to be pre-computed - std::vector<std::string> req_elt; - std::vector<std::string> req_dep; - - eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); - - // generate masks from conditions if provided - auto gen_msk = [&]() { - xt::xtensor<bool, 4> c_msk = xt::zeros<bool>({n_sit, n_ltm, n_msk, n_tim}); - if (m_cdt.size() > 0) - for (int s = 0; s < n_sit; s++) - for (int l = 0; l < n_ltm; l++) - for (int m = 0; m < n_msk; m++) - xt::view(c_msk, s, l, m) = - eh::masks::generate_mask_from_conditions( - xt::view(m_cdt, s, m), - xt::view(q_obs, s), - xt::view(q_prd, s, l) - ); - return c_msk; - }; - const xt::xtensor<bool, 4> c_msk = gen_msk(); - - // generate bootstrap experiment if requested - std::vector<xt::xkeep_slice<int>> b_exp; - auto n_samples = bootstrap.find("n_samples")->second; - auto len_sample = bootstrap.find("len_sample")->second; - if ((n_samples != -9) && (len_sample != -9)) - { - if (dts.empty()) - throw std::runtime_error( - "bootstrap requested but datetimes not provided" - ); - - b_exp = eh::uncertainty::bootstrap( - dts, n_samples, len_sample - ); - } - else - { - // if no bootstrap requested, generate one sample - // containing all the time indices once - xt::xtensor<int, 1> all = xt::arange(n_tim); - b_exp.push_back(xt::keep(all)); - } - - // initialise data structure for outputs - std::vector<xt::xarray<double>> r; - for (const auto& metric : metrics) - r.emplace_back(xt::zeros<double>(dim[metric])); - - auto summary = bootstrap.find("summary")->second; - - // compute variables one site at a time to minimise memory imprint - for (int s = 0; s < n_sit; s++) - // compute variables one lead time at a time to minimise memory imprint - for (int l = 0; l < n_ltm; l++) - { - // instantiate probabilist evaluator - const auto q_obs_v = xt::view(q_obs, s, xt::all()); - const auto q_prd_v = xt::view(q_prd, s, l, xt::all(), xt::all()); - const auto q_thr_v = xt::view(q_thr, s, xt::all()); - const auto t_msk_v = - t_msk.size() > 0 ? - xt::view(t_msk, s, l, xt::all(), xt::all()) : - (m_cdt.size() > 0 ? - xt::view(c_msk, s, l, xt::all(), xt::all()) : - xt::view(t_msk, s, l, xt::all(), xt::all())); - - eh::probabilist::Evaluator evaluator( - q_obs_v, q_prd_v, q_thr_v, t_msk_v, b_exp - ); - - // pre-compute required elt - for (const auto& element : req_elt) - { - if ( element == "o_k" ) - evaluator.calc_o_k(); - else if ( element == "bar_o" ) - evaluator.calc_bar_o(); - else if ( element == "y_k" ) - evaluator.calc_y_k(); - else if ( element == "q_qnt" ) - evaluator.calc_q_qnt(); - } - - // pre-compute required dep - for (const auto& dependency : req_dep) - { - if ( dependency == "bs" ) - evaluator.calc_bs(); - else if ( dependency == "qs" ) - evaluator.calc_qs(); - else if ( dependency == "crps" ) - evaluator.calc_crps(); - } - - // retrieve or compute requested metrics - for (int m = 0; m < metrics.size(); m++) - { - const auto& metric = metrics[m]; - - if ( metric == "BS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BS(); - // (sites, lead times, subsets, samples, thresholds) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BS, summary); - } - else if ( metric == "BSS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BSS(); - // (sites, lead times, subsets, samples, thresholds) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BSS, summary); - } - else if ( metric == "BS_CRD" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BS_CRD(); - // (sites, lead times, subsets, samples, thresholds, components) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BS_CRD, summary); - } - else if ( metric == "BS_LBD" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BS_LBD(); - // (sites, lead times, subsets, samples, thresholds, components) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BS_LBD, summary); - } - else if ( metric == "QS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_QS(); - // (sites, lead times, subsets, samples, quantiles) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.QS, summary); - } - else if ( metric == "CRPS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_CRPS(); - // (sites, lead times, subsets, samples) - xt::view(r[m], s, l, xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.CRPS, summary); - } - } - } - - return r; - } -} diff --git a/src/probabilist/evaluator.hpp b/src/probabilist/evaluator.hpp deleted file mode 100644 index 311c47a4351eaa02acaaa1cbaf70fff645382379..0000000000000000000000000000000000000000 --- a/src/probabilist/evaluator.hpp +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef EVALHYD_PROBABILIST_EVALUATOR_H -#define EVALHYD_PROBABILIST_EVALUATOR_H - -#include <stdexcept> -#include <vector> - -#include <xtensor/xtensor.hpp> -#include <xtensor/xview.hpp> -#include <xtensor/xslice.hpp> - -using view1d_xtensor2d_double_type = decltype( - xt::view( - std::declval<const xt::xtensor<double, 2>&>(), - std::declval<int>(), - xt::all() - ) -); - -using view2d_xtensor4d_double_type = decltype( - xt::view( - std::declval<const xt::xtensor<double, 4>&>(), - std::declval<int>(), - std::declval<int>(), - xt::all(), - xt::all() - ) -); - -using view2d_xtensor4d_bool_type = decltype( - xt::view( - std::declval<const xt::xtensor<bool, 4>&>(), - std::declval<int>(), - std::declval<int>(), - xt::all(), - xt::all() - ) -); - -namespace evalhyd -{ - namespace probabilist - { - class Evaluator - { - private: - // members for input data - const view1d_xtensor2d_double_type& q_obs; - const view2d_xtensor4d_double_type& q_prd; - const view1d_xtensor2d_double_type& q_thr; - xt::xtensor<bool, 2> t_msk; - const std::vector<xt::xkeep_slice<int>>& b_exp; - - // members for dimensions - std::size_t n; - std::size_t n_msk; - std::size_t n_mbr; - std::size_t n_thr; - std::size_t n_exp; - - // members for computational elements - xt::xtensor<double, 2> o_k; - xt::xtensor<double, 3> bar_o; - xt::xtensor<double, 2> y_k; - xt::xtensor<double, 2> q_qnt; - - public: - // constructor method - Evaluator(const view1d_xtensor2d_double_type& obs, - const view2d_xtensor4d_double_type& prd, - const view1d_xtensor2d_double_type& thr, - const view2d_xtensor4d_bool_type& msk, - const std::vector<xt::xkeep_slice<int>>& exp) : - q_obs{obs}, q_prd{prd}, q_thr{thr}, t_msk(msk), b_exp(exp) - { - // initialise a mask if none provided - // (corresponding to no temporal subset) - if (msk.size() < 1) - t_msk = xt::ones<bool>({std::size_t {1}, q_obs.size()}); - - // determine size for recurring dimensions - n = q_obs.size(); - n_msk = t_msk.shape(0); - n_mbr = q_prd.shape(0); - n_thr = q_thr.size(); - n_exp = b_exp.size(); - - // drop time steps where observations and/or predictions are NaN - auto obs_nan = xt::isnan(q_obs); - auto prd_nan = xt::isnan(q_prd); - auto sum_nan = xt::eval(xt::sum(prd_nan, -1)); - - if (xt::amin(sum_nan) != xt::amax(sum_nan)) - throw std::runtime_error( - "predictions across members feature non-equal lengths" - ); - - auto msk_nan = xt::where(obs_nan | xt::row(prd_nan, 0))[0]; - - xt::view(t_msk, xt::all(), xt::keep(msk_nan)) = false; - }; - - // members for intermediate evaluation metrics - // (i.e. before the reduction along the temporal axis) - xt::xtensor<double, 2> bs; - xt::xtensor<double, 2> qs; - xt::xtensor<double, 2> crps; - - // members for evaluation metrics - xt::xtensor<double, 3> BS; - xt::xtensor<double, 4> BS_CRD; - xt::xtensor<double, 4> BS_LBD; - xt::xtensor<double, 3> BSS; - xt::xtensor<double, 3> QS; - xt::xtensor<double, 2> CRPS; - - // methods to compute derived data - void calc_q_qnt(); - - // methods to compute elements - void calc_o_k(); - void calc_bar_o(); - void calc_y_k(); - - // methods to compute intermediate metrics - void calc_bs(); - void calc_qs(); - void calc_crps(); - - // methods to compute metrics - void calc_BS(); - void calc_BS_CRD(); - void calc_BS_LBD(); - void calc_BSS(); - void calc_QS(); - void calc_CRPS(); - }; - } -} - -#endif //EVALHYD_PROBABILIST_EVALUATOR_H diff --git a/src/probabilist/evaluator_brier.cpp b/src/probabilist/evaluator_brier.cpp deleted file mode 100644 index 2528aef8fb811add2845b19bc1bb5709081c9c10..0000000000000000000000000000000000000000 --- a/src/probabilist/evaluator_brier.cpp +++ /dev/null @@ -1,424 +0,0 @@ -#include <xtensor/xmath.hpp> -#include <xtensor/xview.hpp> -#include <xtensor/xmasked_view.hpp> -#include <xtensor/xoperation.hpp> - -#include "probabilist/evaluator.hpp" - -namespace eh = evalhyd; - -// NOTE ------------------------------------------------------------------------ -// All equations in metrics below are following notations from -// "Wilks, D. S. (2011). Statistical methods in the atmospheric sciences. -// Amsterdam; Boston: Elsevier Academic Press. ISBN: 9780123850225". -// In particular, pp. 302-303, 332-333. -// ----------------------------------------------------------------------------- - -namespace evalhyd -{ - namespace probabilist - { - // Compute the Brier score for each time step. - // - // \require o_k: - // Observed event outcome. - // shape: (thresholds, time) - // \require y_k: - // Event probability forecast. - // shape: (thresholds, time) - // \assign bs: - // Brier score for each threshold for each time step. - // shape: (thresholds, time) - void Evaluator::calc_bs() - { - // return computed Brier score(s) - // $bs = (o_k - y_k)^2$ - bs = xt::square(o_k - y_k); - } - - // Compute the Brier score (BS). - // - // \require t_msk: - // Temporal subsets of the whole record. - // shape: (subsets, time) - // \require bs: - // Brier score for each threshold for each time step. - // shape: (thresholds, time) - // \assign BS: - // Brier score for each subset and for each threshold. - // shape: (subsets, samples, thresholds) - void Evaluator::calc_BS() - { - // initialise output variable - // shape: (subsets, thresholds) - BS = xt::zeros<double>({n_msk, n_exp, n_thr}); - - // compute variable one mask at a time to minimise memory imprint - for (int m = 0; m < n_msk; m++) - { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto bs_masked = xt::where(xt::row(t_msk, m), bs, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) - { - // apply the bootstrap sampling - auto bs_masked_sampled = - xt::view(bs_masked, xt::all(), b_exp[e]); - - // calculate the mean over the time steps - // $BS = \frac{1}{n} \sum_{k=1}^{n} (o_k - y_k)^2$ - xt::view(BS, m, e, xt::all()) = - xt::nanmean(bs_masked_sampled, -1); - } - } - - // assign NaN where thresholds were not provided (i.e. set as NaN) - xt::masked_view( - BS, - xt::isnan(xt::view(q_thr, xt::newaxis(), xt::newaxis(), xt::all())) - ) = NAN; - } - - // Compute the calibration-refinement decomposition of the Brier score - // into reliability, resolution, and uncertainty. - // - // BS = reliability - resolution + uncertainty - // - // \require q_thr: - // Streamflow exceedance threshold(s). - // shape: (thresholds,) - // \require t_msk: - // Temporal subsets of the whole record. - // shape: (subsets, time) - // \require o_k: - // Observed event outcome. - // shape: (thresholds, time) - // \require y_k: - // Event probability forecast. - // shape: (thresholds, time) - // \require bar_o: - // Mean event observed outcome. - // shape: (subsets, thresholds) - // \assign BS_CRD: - // Brier score components (reliability, resolution, uncertainty) - // for each subset and for each threshold. - // shape: (subsets, samples, thresholds, components) - void Evaluator::calc_BS_CRD() - { - // declare internal variables - // shape: (bins, thresholds, time) - xt::xtensor<double, 3> msk_bins; - // shape: (bins, thresholds) - xt::xtensor<double, 2> N_i, bar_o_i; - // shape: (bins,) - xt::xtensor<double, 1> y_i; - - // compute range of forecast values $y_i$ - y_i = xt::arange<double>(double(n_mbr + 1)) / n_mbr; - - // initialise output variable - // shape: (subsets, thresholds, components) - BS_CRD = xt::zeros<double>({n_msk, n_exp, n_thr, std::size_t {3}}); - - // compute variable one mask at a time to minimise memory imprint - for (int m = 0; m < n_msk; m++) - { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN); - auto y_k_masked = xt::where(xt::row(t_msk, m), y_k, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) - { - // apply the bootstrap sampling - auto o_k_masked_sampled = - xt::view(o_k_masked, xt::all(), b_exp[e]); - auto y_k_masked_sampled = - xt::view(y_k_masked, xt::all(), b_exp[e]); - auto t_msk_sampled = - xt::view(xt::row(t_msk, m), b_exp[e]); - auto bar_o_sampled = - xt::view(bar_o, xt::all(), e, xt::all()); - - // calculate length of subset - auto l = xt::sum(t_msk_sampled); - - // compute mask to subsample time steps belonging to same forecast bin - // (where bins are defined as the range of forecast values) - msk_bins = xt::equal( - // force evaluation to avoid segfault - xt::eval(y_k_masked_sampled), - xt::view(y_i, xt::all(), xt::newaxis(), xt::newaxis()) - ); - - // compute number of forecasts in each forecast bin $N_i$ - N_i = xt::nansum(msk_bins, -1); - - // compute subsample relative frequency - // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$ - bar_o_i = xt::where( - N_i > 0, - xt::nansum( - xt::where( - msk_bins, - xt::view(o_k_masked_sampled, xt::newaxis(), - xt::all(), xt::all()), - 0. - ), - -1 - ) / N_i, - 0. - ); - - // calculate reliability = - // $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$ - xt::view(BS_CRD, m, e, xt::all(), 0) = - xt::nansum( - xt::square( - xt::view(y_i, xt::all(), xt::newaxis()) - - bar_o_i - ) * N_i, - 0 - ) / l; - - // calculate resolution = - // $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$ - xt::view(BS_CRD, m, e, xt::all(), 1) = - xt::nansum( - xt::square( - bar_o_i - xt::view(bar_o_sampled, m) - ) * N_i, - 0 - ) / l; - - // calculate uncertainty = $\bar{o} (1 - \bar{o})$ - xt::view(BS_CRD, m, e, xt::all(), 2) = - xt::view(bar_o_sampled, m) - * (1 - xt::view(bar_o_sampled, m)); - } - } - - // assign NaN where thresholds were not provided (i.e. set as NaN) - xt::masked_view( - BS_CRD, - xt::isnan(xt::view(q_thr, xt::newaxis(), xt::newaxis(), - xt::all(), xt::newaxis())) - ) = NAN; - } - - // Compute the likelihood-base rate decomposition of the Brier score - // into type 2 bias, discrimination, and sharpness (a.k.a. refinement). - // - // BS = type 2 bias - discrimination + sharpness - // - // \require t_msk: - // Temporal subsets of the whole record. - // shape: (subsets, time) - // \require o_k: - // Observed event outcome. - // shape: (thresholds, time) - // \require y_k: - // Event probability forecast. - // shape: (thresholds, time) - // \return BS_LBD: - // Brier score components (type 2 bias, discrimination, sharpness) - // for each subset and for each threshold. - // shape: (subsets, samples, thresholds, components) - void Evaluator::calc_BS_LBD() - { - // declare internal variables - // shape: (bins, thresholds, time) - xt::xtensor<double, 3> msk_bins; - // shape: (thresholds,) - xt::xtensor<double, 1> bar_y; - // shape: (bins, thresholds) - xt::xtensor<double, 2> M_j, bar_y_j; - // shape: (bins,) - xt::xtensor<double, 1> o_j; - - // set the range of observed values $o_j$ - o_j = {0., 1.}; - - // declare and initialise output variable - // shape: (subsets, thresholds, components) - BS_LBD = xt::zeros<double>({n_msk, n_exp, n_thr, std::size_t {3}}); - - // compute variable one mask at a time to minimise memory imprint - for (int m = 0; m < n_msk; m++) - { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN); - auto y_k_masked = xt::where(xt::row(t_msk, m), y_k, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) - { - // apply the bootstrap sampling - auto o_k_masked_sampled = - xt::view(o_k_masked, xt::all(), b_exp[e]); - auto y_k_masked_sampled = - xt::view(y_k_masked, xt::all(), b_exp[e]); - auto t_msk_sampled = - xt::view(t_msk, xt::all(), b_exp[e]); - - // calculate length of subset - auto l = xt::sum(xt::row(t_msk_sampled, m)); - - // compute mask to subsample time steps belonging to same observation bin - // (where bins are defined as the range of forecast values) - msk_bins = xt::equal( - // force evaluation to avoid segfault - xt::eval(o_k_masked_sampled), - xt::view(o_j, xt::all(), xt::newaxis(), xt::newaxis()) - ); - - // compute number of observations in each observation bin $M_j$ - M_j = xt::nansum(msk_bins, -1); - - // compute subsample relative frequency - // $\bar{y_j} = \frac{1}{M_j} \sum_{k \in M_j} y_k$ - bar_y_j = xt::where( - M_j > 0, - xt::nansum( - xt::where( - msk_bins, - xt::view( - y_k_masked_sampled, - xt::newaxis(), xt::all(), xt::all() - ), - 0. - ), - -1 - ) / M_j, - 0. - ); - - // compute mean "climatology" forecast probability - // $\bar{y} = \frac{1}{n} \sum_{k=1}^{n} y_k$ - bar_y = xt::nanmean(y_k_masked_sampled, -1); - - // calculate type 2 bias = - // $\frac{1}{n} \sum_{j=1}^{J} M_j (o_j - \bar{y_j})^2$ - xt::view(BS_LBD, m, e, xt::all(), 0) = - xt::nansum( - xt::square( - xt::view(o_j, xt::all(), xt::newaxis()) - - bar_y_j - ) * M_j, - 0 - ) / l; - - // calculate discrimination = - // $\frac{1}{n} \sum_{j=1}^{J} M_j (\bar{y_j} - \bar{y})^2$ - xt::view(BS_LBD, m, e, xt::all(), 1) = - xt::nansum( - xt::square( - bar_y_j - bar_y - ) * M_j, - 0 - ) / l; - - // calculate sharpness = - // $\frac{1}{n} \sum_{k=1}^{n} (\bar{y_k} - \bar{y})^2$ - xt::view(BS_LBD, m, e, xt::all(), 2) = - xt::nansum( - xt::square( - y_k_masked_sampled - - xt::view(bar_y, xt::all(), xt::newaxis()) - ), - -1 - ) / l; - } - - } - - // assign NaN where thresholds were not provided (i.e. set as NaN) - xt::masked_view( - BS_LBD, - xt::isnan(xt::view(q_thr, xt::newaxis(), xt::newaxis(), - xt::all(), xt::newaxis())) - ) = NAN; - } - - // Compute the Brier skill score (BSS). - // - // \require t_msk: - // Temporal subsets of the whole record. - // shape: (subsets, time) - // \require o_k: - // Observed event outcome. - // shape: (thresholds, time) - // \require bar_o: - // Mean event observed outcome. - // shape: (subsets, thresholds) - // \require bs: - // Brier scores for each time step. - // shape: (thresholds, time) - // \assign BSS: - // Brier skill score for each subset and for each threshold. - // shape: (subsets, samples, thresholds) - void Evaluator::calc_BSS() - { - // declare and initialise output variable - // shape: (subsets, thresholds) - BSS = xt::zeros<double>({n_msk, n_exp, n_thr}); - - // compute variable one mask at a time to minimise memory imprint - for (int m = 0; m < n_msk; m++) - { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN); - auto bs_masked = xt::where(xt::row(t_msk, m), bs, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) - { - // apply the bootstrap sampling - auto o_k_masked_sampled = - xt::view(o_k_masked, xt::all(), b_exp[e]); - auto bs_masked_sampled = - xt::view(bs_masked, xt::all(), b_exp[e]); - auto bar_o_sampled = - xt::view(bar_o, xt::all(), e, xt::all()); - - // calculate reference Brier score(s) - // $bs_{ref} = \frac{1}{n} \sum_{k=1}^{n} (o_k - \bar{o})^2$ - xt::xtensor<double, 2> bs_ref = - xt::nanmean( - xt::square( - o_k_masked_sampled - - xt::view( - xt::view(bar_o_sampled, m), - xt::all(), xt::newaxis() - ) - ), - -1, xt::keep_dims - ); - - // compute Brier skill score(s) - // $BSS = \frac{1}{n} \sum_{k=1}^{n} 1 - \frac{bs}{bs_{ref}} - xt::view(BSS, m, e, xt::all()) = - xt::nanmean( - xt::where( - xt::equal(bs_ref, 0), - 0, 1 - (bs_masked_sampled / bs_ref) - ), - -1 - ); - } - } - - // assign NaN where thresholds were not provided (i.e. set as NaN) - xt::masked_view( - BSS, - xt::isnan(xt::view(q_thr, xt::newaxis(), xt::newaxis(), - xt::all())) - ) = NAN; - } - } -} diff --git a/src/probabilist/evaluator_elements.cpp b/src/probabilist/evaluator_elements.cpp deleted file mode 100644 index bdce6c1b729a600343ac2bea710caea604a7658c..0000000000000000000000000000000000000000 --- a/src/probabilist/evaluator_elements.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include <xtensor/xmath.hpp> -#include <xtensor/xview.hpp> -#include <xtensor/xsort.hpp> - -#include "probabilist/evaluator.hpp" - -namespace evalhyd -{ - namespace probabilist - { - // Determine observed realisation of threshold(s) exceedance. - // - // \require q_obs: - // Streamflow observations. - // shape: (time,) - // \require q_thr: - // Streamflow exceedance threshold(s). - // shape: (thresholds,) - // \assign o_k: - // Event observed outcome. - // shape: (thresholds, time) - void Evaluator::calc_o_k() - { - // determine observed realisation of threshold(s) exceedance - o_k = q_obs >= xt::view(q_thr, xt::all(), xt::newaxis()); - } - - // Determine mean observed realisation of threshold(s) exceedance. - // - // \require o_k: - // Event observed outcome. - // shape: (thresholds, time) - // \require t_msk: - // Temporal subsets of the whole record. - // shape: (subsets, time) - // \assign bar_o: - // Mean event observed outcome. - // shape: (subsets, samples, thresholds) - void Evaluator::calc_bar_o() - { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto o_k_masked = xt::where( - xt::view(t_msk, xt::all(), xt::newaxis(), xt::all()), - o_k, NAN - ); - - // compute variable one sample at a time - bar_o = xt::zeros<double>({n_msk, n_exp, n_thr}); - - for (int e = 0; e < n_exp; e++) - { - // apply the bootstrap sampling - auto o_k_masked_sampled = - xt::view(o_k_masked, xt::all(), xt::all(), b_exp[e]); - - // compute mean "climatology" relative frequency of the event - // $\bar{o} = \frac{1}{n} \sum_{k=1}^{n} o_k$ - xt::view(bar_o, xt::all(), e, xt::all()) = - xt::nanmean(o_k_masked_sampled, -1); - } - } - - // Determine forecast probability of threshold(s) exceedance to occur. - // - // \require q_prd: - // Streamflow predictions. - // shape: (members, time) - // \require q_thr: - // Streamflow exceedance threshold(s). - // shape: (thresholds,) - // \assign y_k: - // Event probability forecast. - // shape: (thresholds, time) - void Evaluator::calc_y_k() - { - // determine if members have exceeded threshold(s) - auto e_frc = - q_prd - >= xt::view(q_thr, xt::all(), xt::newaxis(), xt::newaxis()); - - // calculate how many members have exceeded threshold(s) - auto n_frc = xt::sum(e_frc, 1); - - // determine probability of threshold(s) exceedance - // /!\ probability calculation dividing by n (the number of - // members), not n+1 (the number of ranks) like in other metrics - y_k = xt::cast<double>(n_frc) / n_mbr; - } - - // Compute the forecast quantiles from the ensemble members. - // - // \require q_prd: - // Streamflow predictions. - // shape: (members, time) - // \assign q_qnt: - // Streamflow forecast quantiles. - // shape: (quantiles, time) - void Evaluator::calc_q_qnt() - { - q_qnt = xt::sort(q_prd, 0); - } - } -} diff --git a/src/probabilist/evaluator_quantiles.cpp b/src/probabilist/evaluator_quantiles.cpp deleted file mode 100644 index c6f37e23a716251c046fe35d3c11e7c5a6b46f73..0000000000000000000000000000000000000000 --- a/src/probabilist/evaluator_quantiles.cpp +++ /dev/null @@ -1,146 +0,0 @@ -#include <unordered_map> -#include <xtensor/xmath.hpp> -#include <xtensor/xview.hpp> -#include <xtensor/xoperation.hpp> - -#include "probabilist/evaluator.hpp" - -namespace eh = evalhyd; - -namespace evalhyd -{ - namespace probabilist - { - // Compute the quantile scores for each time step. - // - // \require q_obs: - // Streamflow observations. - // shape: (time,) - // \require q_qnt: - // Streamflow quantiles. - // shape: (quantiles, time) - // \assign qs: - // Quantile scores for each time step. - // shape: (quantiles, time) - void Evaluator::calc_qs() - { - // compute the quantile order $alpha$ - xt::xtensor<double, 1> alpha = - xt::arange<double>(1., double(n_mbr + 1)) - / double(n_mbr + 1); - - // calculate the difference - xt::xtensor<double, 2> diff = q_qnt - q_obs; - - // calculate the quantile scores - qs = xt::where( - diff > 0, - 2 * (1 - xt::view(alpha, xt::all(), xt::newaxis())) * diff, - - 2 * xt::view(alpha, xt::all(), xt::newaxis()) * diff - ); - } - - // Compute the quantile score (QS). - // - // \require t_msk: - // Temporal subsets of the whole record. - // shape: (subsets, time) - // \require qs: - // Quantile scores for each time step. - // shape: (quantiles, time) - // \assign QS: - // Quantile scores. - // shape: (subsets, quantiles) - void Evaluator::calc_QS() - { - // initialise output variable - // shape: (subsets, quantiles) - QS = xt::zeros<double>({n_msk, n_exp, n_mbr}); - - // compute variable one mask at a time to minimise memory imprint - for (int m = 0; m < n_msk; m++) - { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto qs_masked = xt::where(xt::row(t_msk, m), qs, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) - { - // apply the bootstrap sampling - auto qs_masked_sampled = - xt::view(qs_masked, xt::all(), b_exp[e]); - - // calculate the mean over the time steps - // $QS = \frac{1}{n} \sum_{k=1}^{n} qs$ - xt::view(QS, m, e, xt::all()) = - xt::nanmean(qs_masked_sampled, -1); - } - } - } - - // Compute the continuous rank probability score(s) based - // on quantile scores for each time step, and integrating using the - // trapezoidal rule. - // - // /!\ The number of quantiles must be sufficiently large so that the - // cumulative distribution is smooth enough for the numerical - // integration to be accurate. - // - // \require qs: - // Quantile scores for each time step. - // shape: (quantiles, time) - // \assign crps: - // CRPS for each time step. - // shape: (1, time) - void Evaluator::calc_crps() - { - // integrate with trapezoidal rule - crps = xt::view( - // xt::trapz(y, dx=1/(n+1), axis=0) - xt::trapz(qs, 1./(double(n_mbr) + 1.), 0), - xt::newaxis(), xt::all() - ); - } - - // Compute the continuous rank probability score (CRPS) based - // on quantile scores. - // - // \require t_msk: - // Temporal subsets of the whole record. - // shape: (subsets, time) - // \require crps: - // CRPS for each time step. - // shape: (1, time) - // \assign CRPS: - // CRPS. - // shape: (subsets,) - void Evaluator::calc_CRPS() - { - // initialise output variable - // shape: (subsets,) - CRPS = xt::zeros<double>({n_msk, n_exp}); - - // compute variable one mask at a time to minimise memory imprint - for (int m = 0; m < n_msk; m++) - { - // apply the mask - // (using NaN workaround until reducers work on masked_view) - auto crps_masked = xt::where(xt::row(t_msk, m), crps, NAN); - - // compute variable one sample at a time - for (int e = 0; e < n_exp; e++) - { - // apply the bootstrap sampling - auto crps_masked_sampled = - xt::view(crps_masked, xt::all(), b_exp[e]); - - // calculate the mean over the time steps - // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$ - xt::view(CRPS, m, e) = - xt::squeeze(xt::nanmean(crps_masked_sampled, -1)); - } - } - } - } -} diff --git a/src/uncertainty.hpp b/src/uncertainty.hpp deleted file mode 100644 index fd26b45e17c868d9ad49b0498862a283522a5aa1..0000000000000000000000000000000000000000 --- a/src/uncertainty.hpp +++ /dev/null @@ -1,186 +0,0 @@ -#ifndef EVALHYD_UNCERTAINTY_HPP -#define EVALHYD_UNCERTAINTY_HPP - -#include <string> -#include <vector> -#include <array> -#include <ctime> -#include <chrono> -#include <iomanip> -#include <stdexcept> - -#include <xtensor/xtensor.hpp> -#include <xtensor/xadapt.hpp> -#include <xtensor/xrandom.hpp> -#include <xtensor/xio.hpp> - -#include "maths.hpp" - -typedef std::chrono::time_point< - std::chrono::system_clock, std::chrono::minutes -> tp_minutes; - -namespace evalhyd -{ - namespace uncertainty - { - inline auto bootstrap( - const std::vector<std::string>& datetimes, - int n_samples, int len_sample - ) - { - // convert string to time_point (via tm) - std::vector<std::tm> v_tm; - std::vector<tp_minutes> v_timepoints; - - for (auto const& str: datetimes) - { - // convert string to tm - std::tm tm = {}; - std::istringstream ss(str); - ss >> std::get_time(&tm, "%Y-%m-%d %H:%M:%S"); - if (ss.fail()) { - throw std::runtime_error("datetime string parsing failed"); - } - tm.tm_year += 400; // add 400y to avoid dates prior 1970 - // while preserving leap year pattern - v_tm.push_back(tm); - - // convert tm to time_point - auto tp = std::chrono::system_clock::from_time_t(std::mktime(&tm)); - v_timepoints.push_back( - std::chrono::time_point_cast<std::chrono::minutes>(tp) - ); - } - - // adapt vector into xtensor - xt::xtensor<tp_minutes, 1> x_timepoints = xt::adapt(v_timepoints); - - // check constant time interval - auto ti = x_timepoints[1] - x_timepoints[0]; - for (int t = 1; t < x_timepoints.size() - 1; t++) - if (x_timepoints[t + 1] - x_timepoints[t] != ti) { - throw std::runtime_error( - "time interval not constant across datetimes" - ); - } - - // identify start and end years for period - int start_yr = v_tm.front().tm_year + 1900; - int end_yr = v_tm.back().tm_year + 1900; - - // assume start of year block as start of time series - std::tm start_hy = v_tm.front(); - - xt::xtensor<int, 1> year_blocks = xt::zeros<int>({v_tm.size()}); - for (int y = start_yr; y < end_yr; y++) { - // define window for year blocks - start_hy.tm_year = y - 1900; - auto start = std::chrono::system_clock::from_time_t( - std::mktime(&start_hy) - ); - start_hy.tm_year += 1; - auto end = std::chrono::system_clock::from_time_t( - std::mktime(&start_hy) - ); - - xt::xtensor<bool, 1> wdw = - (x_timepoints >= start) & (x_timepoints < end); - - // check that year is complete (without a rigorous leap year check) - int n_days = xt::sum(wdw)(); - if ((n_days != 365) && (n_days != 366)) { - throw std::runtime_error( - "year starting in " + std::to_string(y) - + " is incomplete" - ); - } - - // determine corresponding year block for each time step - year_blocks = xt::where(wdw, y, year_blocks); - } - - // check that time series ends on the last day of a year block - if (year_blocks(year_blocks.size() - 1) == 0) { - throw std::runtime_error( - "final day of final year not equal to first day of " - "first year minus one time step" - ); - } - - // generate bootstrapping experiment - xt::xtensor<int, 2> experiment = xt::random::randint( - {n_samples, len_sample}, start_yr, end_yr - ); - - std::vector<xt::xkeep_slice<int>> samples; - - // compute metrics for each sample - for (int s = 0; s < n_samples; s++) { - // select bootstrapped years - auto exp = xt::view(experiment, s); - - auto i0 = xt::flatten_indices( - xt::argwhere(xt::equal(year_blocks, exp(0))) - ); - auto i1 = xt::flatten_indices( - xt::argwhere(xt::equal(year_blocks, exp(1))) - ); - xt::xtensor<int, 1> idx = xt::concatenate(xt::xtuple(i0, i1), 0); - - for (int p = 2; p < exp.size(); p++) { - auto i = xt::flatten_indices( - xt::argwhere(xt::equal(year_blocks, exp(p))) - ); - idx = xt::concatenate(xt::xtuple(idx, i), 0); - } - - samples.push_back(xt::keep(idx)); - } - - return samples; - } - - inline auto summarise(const xt::xarray<double>& values, int summary) - { - // TODO: wait for xt::quantile to be available - // or implement it internally for n-dim expressions - // summary 2: series of quantiles across samples - if (summary == 2) { -// // adjust last axis size (from samples to number of statistics) -// auto s = values.shape(); -// s.pop_back(); -// s.push_back(7); -// xt::xarray<double> v = xt::zeros<double>(s); -// // quantiles -// xt::view(v, xt::all()) = -// xt::quantile(values, {0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95}, -1); -// return v; - return values; - } - // TODO: wait for xt::stddev to be fixed for rtensor - // or implement it internally for n-dim expressions - // summary 1: mean and standard deviation across samples - else if (summary == 1) { -// // adjust last axis size (from samples to number of statistics) -// auto s = values.shape(); -// s.pop_back(); -// s.push_back(2); -// xt::xarray<double> v = xt::zeros<double>(s); -// // mean -// xt::strided_view(s, xt::ellipsis(), 0) = xt::mean(values, -1); -// // standard deviation -// xt::strided_view(s, xt::ellipsis(), 1) = xt::stddev(values, -1); -// return v; - return values; - } - // summary 0: raw (keep all samples) - else { - return values; - } - } - - } -} - -#endif //EVALHYD_UNCERTAINTY_HPP diff --git a/src/utils.hpp b/src/utils.hpp deleted file mode 100644 index 7cc02a56f2a43732d098064eddc825042bdae24e..0000000000000000000000000000000000000000 --- a/src/utils.hpp +++ /dev/null @@ -1,165 +0,0 @@ -#ifndef EVALHYD_UTILS_HPP -#define EVALHYD_UTILS_HPP - -#include <unordered_map> -#include <unordered_set> -#include <vector> -#include <stdexcept> -#include <xtensor/xtensor.hpp> -#include <xtensor/xrandom.hpp> - -namespace evalhyd -{ - namespace utils - { - /// Procedure to determine based on a list of metrics which elements - /// and which metrics (and their associated elements) require to be - /// pre-computed for memoisation purposes. - /// - /// \param [in] metrics: - /// Vector of strings for the metric(s) to be computed. - /// \param [in] elements: - /// Map between metrics and their required computation elements. - /// \param [in] dependencies: - /// Map between metrics and their dependencies. - /// \param [out] required_elements: - /// Set of elements identified as required to be pre-computed. - /// \param [out] required_dependencies: - /// Set of metrics identified as required to be pre-computed. - inline void find_requirements ( - const std::vector<std::string>& metrics, - std::unordered_map<std::string, std::vector<std::string>>& elements, - std::unordered_map<std::string, std::vector<std::string>>& dependencies, - std::vector<std::string>& required_elements, - std::vector<std::string>& required_dependencies - ) - { - std::unordered_set<std::string> found_elements; - std::unordered_set<std::string> found_dependencies; - - for (const auto& metric : metrics) - { - // add elements to pre-computation set - for (const auto& element : elements[metric]) - if (found_elements.find(element) == found_elements.end()) - { - found_elements.insert(element); - required_elements.push_back(element); - } - - // add metric dependencies to pre-computation set - if (dependencies.find(metric) != dependencies.end()) - { - for (const auto& dependency : dependencies[metric]) - { - if (found_dependencies.find(dependency) == found_dependencies.end()) - { - found_dependencies.insert(dependency); - required_dependencies.push_back(dependency); - } - // add dependency elements to pre-computation set - for (const auto& element : elements[dependency]) - if (found_elements.find(element) == found_elements.end()) - { - found_elements.insert(element); - required_elements.push_back(element); - } - } - } - } - } - - /// Procedure to check that all elements in the list of metrics are - /// valid metrics. - /// - /// \param [in] requested_metrics: - /// Vector of strings for the metric(s) to be computed. - /// \param [in] valid_metrics: - /// Vector of strings for the metric(s) to can be computed. - inline void check_metrics ( - const std::vector<std::string>& requested_metrics, - const std::vector<std::string>& valid_metrics - ) - { - for (const auto& metric : requested_metrics) - { - if (std::find(valid_metrics.begin(), valid_metrics.end(), metric) - == valid_metrics.end()) - { - throw std::runtime_error( - "invalid evaluation metric: " + metric - ); - } - } - } - - /// Procedure to check that all elements for a bootstrap experiment - /// are provided and valid. - /// - /// \param [in] bootstrap: - /// Map of parameters for the bootstrap experiment. - inline void check_bootstrap ( - const std::unordered_map<std::string, int>& bootstrap - ) - { - // check n_samples - if (bootstrap.find("n_samples") == bootstrap.end()) - throw std::runtime_error( - "number of samples missing for bootstrap" - ); - // check len_sample - if (bootstrap.find("len_sample") == bootstrap.end()) - throw std::runtime_error( - "length of sample missing for bootstrap" - ); - // check summary - if (bootstrap.find("summary") == bootstrap.end()) - throw std::runtime_error( - "summary missing for bootstrap" - ); - auto s = bootstrap.find("summary")->second; - // TODO: change upper bound when mean+stddev and quantiles implemented - if ((s < 0) || (s > 0)) - throw std::runtime_error( - "invalid value for bootstrap summary" - ); - // set seed - if (bootstrap.find("seed") == bootstrap.end()) - xt::random::seed(time(nullptr)); - else - xt::random::seed(bootstrap.find("seed")->second); - } - - namespace evalp - { - /// Procedure to check that optional parameters are provided - /// as arguments when required metrics need them. - /// - /// \param [in] metrics: - /// Vector of strings for the metric(s) to be computed. - /// \param [in] thresholds: - /// Array of thresholds for metrics based on exceedance events. - inline void check_optionals ( - const std::vector<std::string>& metrics, - const xt::xtensor<double, 1>& thresholds - ) - { - std::vector<std::string>threshold_metrics = - {"BS", "BS_CRD", "BS_LBD", "BSS"}; - - for (const auto& metric : metrics) - { - if (std::find(threshold_metrics.begin(), threshold_metrics.end(), - metric) != threshold_metrics.end()) - if (thresholds.size() < 1) - throw std::runtime_error( - "missing thresholds *q_thr* required to " - "compute " + metric - ); - } - } - } - } -} - -#endif //EVALHYD_UTILS_HPP diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index eb0ffa74d796b7820d5e722453d3549175276e2c..a67f819f67c86753ddd38bdf5863651233d496e6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,7 @@ +# Copyright (c) 2023, INRAE. +# Distributed under the terms of the GPL-3 Licence. +# The full licence is in the file LICENCE, distributed with this software. + cmake_minimum_required(VERSION 3.15) # ------------------------------------------------------------------------------ @@ -15,23 +19,37 @@ add_executable( test_uncertainty.cpp ) +OPTION(EVALHYD_TESTING_OS "OS system used to run tests") + +if(CMAKE_HOST_APPLE) + target_compile_definitions(evalhyd_tests PRIVATE EVALHYD_TESTING_OS MACOS) + message(STATUS "Found supported OS to run tests: APPLE") +elseif(CMAKE_HOST_WIN32) + target_compile_definitions(evalhyd_tests PRIVATE EVALHYD_TESTING_OS WINDOWS) + message(STATUS "Found supported OS to run tests: WIN32") +elseif(CMAKE_HOST_UNIX) + target_compile_definitions(evalhyd_tests PRIVATE EVALHYD_TESTING_OS LINUX) + message(STATUS "Found supported OS to run tests: UNIX") +else() + message(SEND_ERROR "OS not supported to run tests") +endif() + set_target_properties( evalhyd_tests PROPERTIES VISIBILITY_INLINES_HIDDEN ON - CXX_VISIBILITY_PRESET hidden ) target_include_directories( evalhyd_tests PRIVATE - ${CMAKE_SOURCE_DIR}/src + ${CMAKE_SOURCE_DIR}/include/evalhyd ) target_compile_definitions( evalhyd_tests PRIVATE - EVALHYD_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}/data" + EVALHYD_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}" ) target_link_libraries( diff --git a/tests/expected/evald/CONT_TBL.csv b/tests/expected/evald/CONT_TBL.csv new file mode 100644 index 0000000000000000000000000000000000000000..d9490d6fb9566bd6c901b8b855cf50e272d40e8c --- /dev/null +++ b/tests/expected/evald/CONT_TBL.csv @@ -0,0 +1,204 @@ +157.,19.,15.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +157.,19.,15.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +157.,19.,15.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +157.,19.,15.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,21.,14.,118. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,21.,14.,118. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,21.,14.,118. +200.,16.,8.,87. +221.,21.,5.,64. +NAN,NAN,NAN,NAN diff --git a/tests/expected/evald/KGE.csv b/tests/expected/evald/KGE.csv new file mode 100644 index 0000000000000000000000000000000000000000..65b1265ba054dbeee0465666a960c4b6c07944c3 --- /dev/null +++ b/tests/expected/evald/KGE.csv @@ -0,0 +1,51 @@ +0.7480876678384525 +0.74610619665192 +0.7441110304778197 +0.7430108522656984 +0.7417677706194681 +0.740519915124128 +0.7396393314765528 +0.7391812106418076 +0.7385521156240031 +0.7374975605864584 +0.736478762920044 +0.7356032352557134 +0.7349262719558889 +0.7341531483736209 +0.7335193136927298 +0.732498016247827 +0.7316031283668971 +0.7311620062353068 +0.7304853804554484 +0.7298301318606002 +0.7291682672297097 +0.7284933080332816 +0.7278420198262487 +0.7273338548948837 +0.7266696338186898 +0.7261028872180326 +0.7255515136947399 +0.7249203100577184 +0.724129099815763 +0.7235915471922136 +0.723195030128365 +0.7223157825504646 +0.7214401411915639 +0.7203988937539173 +0.7197737983854688 +0.7188157660001235 +0.7176518268945717 +0.716230562324343 +0.7149933138365094 +0.7133901818967825 +0.7126230134351779 +0.711672543996632 +0.7101399598194 +0.7086263896776204 +0.7068405183946846 +0.7050500737470602 +0.7031816136500466 +0.7006732963875493 +0.6961818766730593 +0.6916216736996625 +0.6764337637969222 diff --git a/tests/expected/evald/KGEPRIME.csv b/tests/expected/evald/KGEPRIME.csv new file mode 100644 index 0000000000000000000000000000000000000000..b3ab57461cefacf8d86dc335cc9a7bb86c323507 --- /dev/null +++ b/tests/expected/evald/KGEPRIME.csv @@ -0,0 +1,51 @@ +0.8131407494929581 +0.8127748549543973 +0.8120324184047302 +0.8117867087643632 +0.8113865804825375 +0.8110546552600805 +0.8110115513420257 +0.8109282579595157 +0.8107808794738488 +0.8102714601853906 +0.8098723493335118 +0.80972964340455 +0.8096359354316549 +0.8093267143355573 +0.8091370309262083 +0.80876520882257 +0.808297843099951 +0.8082504172955576 +0.8079588362318786 +0.8077792859196675 +0.8075824101888797 +0.8072872435440667 +0.8071551289052044 +0.8070989073590911 +0.8068619291248411 +0.8067928045232984 +0.8066998263051663 +0.806455314511177 +0.806108392682091 +0.8060009627097642 +0.8059219799600571 +0.8056200993568368 +0.805444123201897 +0.805182363569416 +0.8049830239354914 +0.8048140123227603 +0.8043601618202886 +0.8037118677795622 +0.8035729804192682 +0.8027506521878371 +0.8024589351470541 +0.8021804568386013 +0.8017099458589753 +0.8013131114124993 +0.8004747893474917 +0.7998057696216888 +0.7995518852898957 +0.7986597353849383 +0.7971020515928053 +0.7958136756510419 +0.7899796163833354 diff --git a/tests/expected/evald/KGEPRIME_D.csv b/tests/expected/evald/KGEPRIME_D.csv new file mode 100644 index 0000000000000000000000000000000000000000..558a3361645bc094d86d5c9b20fc189c013d9873 --- /dev/null +++ b/tests/expected/evald/KGEPRIME_D.csv @@ -0,0 +1,51 @@ +0.9071248643948864,1.1477333530243923,1.0668239858924582 +0.9077537938998346,1.1478429758547268,1.0684568980300118 +0.9080496949392161,1.1484028026509738,1.0696675053044051 +0.9082556740306482,1.1484209022637022,1.0705581846521615 +0.9084737485198940,1.1486630576436567,1.0713951161082431 +0.9089321810875520,1.1489428726605424,1.0722704389155211 +0.9093310954815347,1.1488135780819770,1.0731457617227991 +0.9094546896430078,1.1488016399441205,1.0735373535050023 +0.9095397983553178,1.1487856134230323,1.0740517976110342 +0.9095735837396087,1.1491999363901180,1.0745611228702399 +0.9096311356178044,1.1494305824532358,1.0751830627596215 +0.9101044432964827,1.1495392868849328,1.0758920230450484 +0.9102441911392056,1.1494163423692945,1.0765318788983218 +0.9102574967893201,1.1495752574168396,1.0770053722297441 +0.9103829299622012,1.1496597741688170,1.0774558307504485 +0.9104389606151662,1.1498193287039202,1.0781263996846790 +0.9103708026184928,1.1501613638087465,1.0785359074307741 +0.9106545061768646,1.1501897035858120,1.0789198209427380 +0.9106680754546872,1.1503679937353128,1.0793037344547021 +0.9107871131080774,1.1504053916036157,1.0798002625968421 +0.9109331668775991,1.1504930666773272,1.0802711965048515 +0.9109503931900584,1.1506796793881644,1.0806474317465762 +0.9112916063517488,1.1507819870125124,1.0811465193121295 +0.9116384611328783,1.1508213892258903,1.0815841807157685 +0.9116240546641530,1.1508649226798111,1.0820474363535382 +0.9117728671872545,1.1507388089259101,1.0826002718107666 +0.9119101939393581,1.1506625843082945,1.0831019187997328 +0.9119150575215442,1.1507542944710545,1.0835088671224147 +0.9119108416265477,1.1509472994224532,1.0839593256431190 +0.9120349545371204,1.1508964568748929,1.0844277001277152 +0.9120974194539910,1.1508349399490534,1.0847834599821353 +0.9121099264543083,1.1508757330670814,1.0854130781417561 +0.9125099729643541,1.1509349546260605,1.0861169195803568 +0.9129864907072712,1.1511152707612748,1.0868719494872194 +0.9130727481787702,1.1511623991072226,1.0873224080079240 +0.9137178424599028,1.1513164779920453,1.0880697596445472 +0.9137419646569960,1.1514764306974452,1.0888222301279968 +0.9136897426066677,1.1518176713888661,1.0896156513860558 +0.9144254206019433,1.1517387012476892,1.0907520353814693 +0.9142984381688551,1.1522701343488451,1.0915224218288104 +0.9142709614317646,1.1523150149572656,1.0920496630519076 +0.9144137451984202,1.1523110423287639,1.0927842175714655 +0.9149046185657690,1.1525453982271228,1.0938489377113123 +0.9155452591872579,1.1527254670882050,1.0949674057428342 +0.9155186197874120,1.1532046716768467,1.0959246300993313 +0.9154368750377250,1.1532273515892346,1.0972017823824649 +0.9167750313811954,1.1532539657633949,1.0988244568263661 +0.9169078074034277,1.1533196303650488,1.1006314097560101 +0.9175076075676912,1.1535549186612073,1.1038434861394426 +0.9179463597643975,1.1527797412590994,1.1077849981956065 +0.9179618893753376,1.1523899554203556,1.1189773567810644 diff --git a/tests/expected/evald/KGE_D.csv b/tests/expected/evald/KGE_D.csv new file mode 100644 index 0000000000000000000000000000000000000000..7d65cb59f4074a5036b38bfead4a934248bdf974 --- /dev/null +++ b/tests/expected/evald/KGE_D.csv @@ -0,0 +1,51 @@ +0.9071248643948864,1.2244294704151979,1.0668239858924582 +0.9077537938998346,1.2264207454072791,1.0684568980300118 +0.9080496949392161,1.2284091609962542,1.0696675053044051 +0.9082556740306482,1.2294513963440263,1.0705581846521615 +0.9084737485198940,1.2306719900133749,1.0713951161082431 +0.9089321810875520,1.2319774783565793,1.0722704389155211 +0.9093310954815347,1.2328444223282775,1.0731457617227991 +0.9094546896430078,1.2332814722478178,1.0735373535050023 +0.9095397983553178,1.2338552531667024,1.0740517976110342 +0.9095735837396087,1.2348855740497733,1.0745611228702399 +0.9096311356178044,1.2358482940716458,1.0751830627596215 +0.9101044432964827,1.2367801489363925,1.0758920230450484 +0.9102441911392056,1.2373833346872534,1.0765318788983218 +0.9102574967893201,1.2380987280203271,1.0770053722297441 +0.9103829299622012,1.2387076270574358,1.0774558307504485 +0.9104389606151662,1.2396505731434120,1.0781263996846790 +0.9103708026184928,1.2404903302072829,1.0785359074307741 +0.9106545061768646,1.2409624690429850,1.0789198209427380 +0.9106680754546872,1.2415964716356864,1.0793037344547021 +0.9107871131080774,1.2422080439464072,1.0798002625968421 +0.9109331668775991,1.2428445217100519,1.0802711965048515 +0.9109503931900584,1.2434790402937934,1.0806474317465762 +0.9112916063517488,1.2441639397456739,1.0811465193121295 +0.9116384611328783,1.2447102094160669,1.0815841807157685 +0.9116240546641530,1.2452904391749027,1.0820474363535382 +0.9117728671872545,1.2457901473263877,1.0826002718107666 +0.9119101939393581,1.2462848529553729,1.0831019187997328 +0.9119150575215442,1.2468524819385858,1.0835088671224147 +0.9119108416265477,1.2475800585327315,1.0839593256431190 +0.9120349545371204,1.2480639978139763,1.0844277001277152 +0.9120974194539910,1.2484067080262666,1.0847834599821353 +0.9121099264543083,1.2491755719869908,1.0854130781417561 +0.9125099729643541,1.2500499275558148,1.0861169195803568 +0.9129864907072712,1.2511148984168152,1.0868719494872194 +0.9130727481787702,1.2516846718054440,1.0873224080079240 +0.9137178424599028,1.2527126434836113,1.0880697596445472 +0.9137419646569960,1.2537531352118179,1.0888222301279968 +0.9136897426066677,1.2550385622883493,1.0896156513860558 +0.9144254206019433,1.2562613326135268,1.0907520353814693 +0.9142984381688551,1.2577286876454601,1.0915224218288104 +0.9142709614317646,1.2583852238137356,1.0920496630519076 +0.9144137451984202,1.2592273207901978,1.0927842175714655 +0.9149046185657690,1.2607105595147998,1.0938489377113123 +0.9155452591872579,1.2621968142312687,1.0949674057428342 +0.9155186197874120,1.2638254032362688,1.0959246300993313 +0.9154368750377250,1.2653231056559175,1.0972017823824649 +0.9167750313811954,1.2672236625128150,1.0988244568263661 +0.9169078074034277,1.2693798106679639,1.1006314097560101 +0.9175076075676912,1.2733440828682880,1.1038434861394426 +0.9179463597643975,1.2770321035906431,1.1077849981956065 +0.9179618893753376,1.2894982662973180,1.1189773567810644 diff --git a/tests/expected/evald/MAE.csv b/tests/expected/evald/MAE.csv new file mode 100644 index 0000000000000000000000000000000000000000..ef9cf0e5aa19cebfd335aaaafd5f73f21178bff5 --- /dev/null +++ b/tests/expected/evald/MAE.csv @@ -0,0 +1,51 @@ +265.1929260450160655 +265.6816720257234579 +265.7041800643086731 +265.7041800643086731 +265.8360128617363216 +266.1318327974276485 +266.4019292604501743 +266.3729903536977304 +266.5048231511253789 +266.6816720257234579 +266.6977491961415012 +266.8360128617363216 +267.0482315112540164 +267.3215434083601281 +267.4758842443729918 +267.8617363344051228 +268.0160771704179865 +267.9389067524115831 +268.1511254019292778 +268.1382636655948772 +268.3311897106109427 +268.5144694533761935 +268.4726688102894059 +268.3344051446945286 +268.5369774919614088 +268.6527331189710708 +268.7556270096462754 +268.9260450160771825 +269.0739549839228175 +269.2443729903537246 +269.4147909967845749 +269.7491961414791035 +269.7909967845658912 +269.9099678456591391 +270.0643086816720029 +269.9421221864951690 +270.1864951768488936 +270.6623794212218854 +271.1061093247588474 +271.5852090032154251 +271.9067524115755532 +272.1286173633440626 +272.3279742765273568 +272.6784565916398719 +273.4501607717041907 +274.5530546623793953 +274.8617363344051228 +276.1286173633440626 +278.5176848874597795 +281.2700964630225258 +291.2990353697749129 diff --git a/tests/expected/evald/MARE.csv b/tests/expected/evald/MARE.csv new file mode 100644 index 0000000000000000000000000000000000000000..bb93762afc8ff3ed5f1bad0080f8eaf37ae81c78 --- /dev/null +++ b/tests/expected/evald/MARE.csv @@ -0,0 +1,51 @@ +0.2110884459948862 +0.2114774783536764 +0.2114953943175681 +0.2114953943175681 +0.2116003306775049 +0.2118357976315096 +0.2120507891982094 +0.2120277543874916 +0.2121326907474284 +0.2122734590351485 +0.2122862561522140 +0.2123963113589770 +0.2125652333042412 +0.2127827842943542 +0.2129056366181827 +0.2132127674277538 +0.2133356197515823 +0.2132741935896681 +0.2134431155349323 +0.2134328778412799 +0.2135864432460655 +0.2137323303806118 +0.2136990578762416 +0.2135890026694786 +0.2137502463445035 +0.2138423855873749 +0.2139242871365938 +0.2140599365774878 +0.2141776700544901 +0.2143133194953841 +0.2144489689362780 +0.2147151489712397 +0.2147484214756099 +0.2148431201418944 +0.2149659724657229 +0.2148687143760253 +0.2150632305554205 +0.2154420252205583 +0.2157952256515652 +0.2161765797401161 +0.2164325220814255 +0.2166091222969289 +0.2167678065485407 +0.2170467837005679 +0.2176610453197104 +0.2185389275504014 +0.2187846321980584 +0.2197930450228172 +0.2216946966187457 +0.2238855630603538 +0.2318684046857924 diff --git a/tests/expected/evald/MSE.csv b/tests/expected/evald/MSE.csv new file mode 100644 index 0000000000000000000000000000000000000000..53a1eb3781e0e67ab67632c05a929f6d1aff72f9 --- /dev/null +++ b/tests/expected/evald/MSE.csv @@ -0,0 +1,51 @@ +603782.2604501608293504 +603540.1704180064843968 +604973.1768488745437935 +605519.1061093247262761 +606241.1157556270482019 +605823.9710610932670534 +605116.8520900321891531 +605160.5144694533664733 +605628.1511254019569606 +607006.1800643086899072 +608195.0578778134658933 +607157.1061093247262761 +607415.4598070739302784 +608465.9453376205638051 +608766.6463022507959977 +609964.8456591640133411 +611618.5176848875125870 +610871.5080385851906613 +611795.5273311897180974 +612155.2250803858041763 +612401.4630225080763921 +613310.6237942122388631 +612593.7202572347596288 +611633.2090032154228538 +612660.8906752411276102 +612724.9549839228857309 +612831.2958199357381091 +613728.3408360128523782 +614918.0514469452900812 +615075.4372990353731439 +615330.4244372990215197 +616544.5594855305971578 +615854.5048231511609629 +615046.1800643086899072 +615534.5530546624213457 +613767.3826366559369490 +615365.1704180064843968 +617751.3633440514095128 +615900.7909967845771462 +618968.0353697749087587 +620238.9099678456550464 +620927.2025723472470418 +620784.1286173633998260 +619856.3247588424710557 +622720.4019292604643852 +625799.9421221865341067 +621881.5369774919236079 +624982.4630225080763921 +628774.5691318328026682 +633351.0771704179933295 +656835.5305466237477958 diff --git a/tests/expected/evald/NSE.csv b/tests/expected/evald/NSE.csv new file mode 100644 index 0000000000000000000000000000000000000000..578721bc7dab93dc5517cb15d48d9d81844d6e7d --- /dev/null +++ b/tests/expected/evald/NSE.csv @@ -0,0 +1,51 @@ +0.7189121923160171 +0.7190248961181289 +0.7183577671505612 +0.7181036125173065 +0.7177674845422075 +0.7179616841657375 +0.7182908798615486 +0.7182705530594651 +0.718052847156156 +0.7174113126846504 +0.7168578365723296 +0.7173410498202125 +0.7172207745500294 +0.7167317262719881 +0.7165917364437786 +0.7160339207336052 +0.7152640620034193 +0.7156118286033213 +0.7151816560490262 +0.7150142005632107 +0.7148995657223857 +0.714476310478086 +0.7148100613295596 +0.715257222533744 +0.7147787904778381 +0.7147489656597577 +0.7146994592160939 +0.7142818444011523 +0.7137279805841595 +0.7136547103888884 +0.7135360024036145 +0.7129707679121519 +0.7132920194045123 +0.7136683309479988 +0.7134409713480524 +0.7142636686863795 +0.7135198265862999 +0.7124089464193454 +0.7132704711081728 +0.7118425308507162 +0.7112508815459708 +0.7109304503708312 +0.710997057734077 +0.7114289921740817 +0.710095635390346 +0.7086619708754944 +0.7104861647677075 +0.7090425441765131 +0.7072771481677261 +0.705146577768962 +0.6942135081069736 diff --git a/tests/expected/evald/RMSE.csv b/tests/expected/evald/RMSE.csv new file mode 100644 index 0000000000000000000000000000000000000000..42b7512dae33e1eea96a9afcc51236c16e3ba7ce --- /dev/null +++ b/tests/expected/evald/RMSE.csv @@ -0,0 +1,51 @@ +777.0342723780984 +776.8784785396018 +777.8002165394881 +778.1510818018085 +778.6148699810626 +778.3469477431598 +777.8925710469488 +777.9206350711191 +778.2211453856814 +779.1060133668002 +779.8686157794872 +779.2028658246354 +779.3686289600538 +780.0422715068848 +780.2349942820117 +781.0024620058275 +782.0604309673821 +781.5826943059737 +782.1735915582868 +782.4034925026765 +782.5608366271009 +783.1415094312216 +782.6836655106805 +782.0698236111757 +782.7265746576138 +782.7674973987633 +782.8354206472366 +783.4081572437275 +784.1671068381696 +784.267452658234 +784.4299997050719 +785.2035146925481 +784.7639803298513 +784.2487998488162 +784.5601016204319 +783.4330747655832 +784.4521466718072 +785.971604667784 +784.7934702816943 +786.7452162992635 +787.5524807705488 +787.9893416616416 +787.8985522371287 +787.3095482456963 +789.1263535893733 +791.0751810809049 +788.5946594908514 +790.5583236058602 +792.9530686817681 +795.833573789406 +810.4539040233095 diff --git a/tests/expected/evalp/AS.csv b/tests/expected/evalp/AS.csv new file mode 100644 index 0000000000000000000000000000000000000000..2062dbb440cef55fc54fcc1c82abe591701a9b02 --- /dev/null +++ b/tests/expected/evalp/AS.csv @@ -0,0 +1 @@ +0.4914810317862 diff --git a/tests/expected/evalp/AW.csv b/tests/expected/evalp/AW.csv new file mode 100644 index 0000000000000000000000000000000000000000..40622cd35ff86f9f32c117c5d13b4572137f2d13 --- /dev/null +++ b/tests/expected/evalp/AW.csv @@ -0,0 +1 @@ +9.2749196141479,31.3215434083601 diff --git a/tests/expected/evalp/AWI.csv b/tests/expected/evalp/AWI.csv new file mode 100644 index 0000000000000000000000000000000000000000..8b3b7e7f9598b23911dc06f440d71572b99b48e5 --- /dev/null +++ b/tests/expected/evalp/AWI.csv @@ -0,0 +1 @@ +0.9821120161733,0.9880951944476 diff --git a/tests/expected/evalp/AWN.csv b/tests/expected/evalp/AWN.csv new file mode 100644 index 0000000000000000000000000000000000000000..34e29a4d703bbbd371011a5dca898f891df2ab20 --- /dev/null +++ b/tests/expected/evalp/AWN.csv @@ -0,0 +1 @@ +0.0073826568351,0.0249313434669 diff --git a/tests/expected/evalp/BS.csv b/tests/expected/evalp/BS.csv new file mode 100644 index 0000000000000000000000000000000000000000..3db5ba184a4650f383600095ea081a04acf97c74 --- /dev/null +++ b/tests/expected/evalp/BS.csv @@ -0,0 +1,4 @@ +0.1061513565769 +0.0739562201528 +0.0866918610329 +nan diff --git a/tests/expected/evalp/BSS.csv b/tests/expected/evalp/BSS.csv new file mode 100644 index 0000000000000000000000000000000000000000..6b26f31c41c6febf5321dbd600f2c38654b819a6 --- /dev/null +++ b/tests/expected/evalp/BSS.csv @@ -0,0 +1,4 @@ +0.5705594211361 +0.6661165249535 +0.5635125720476 +nan diff --git a/tests/expected/evalp/BS_CRD.csv b/tests/expected/evalp/BS_CRD.csv new file mode 100644 index 0000000000000000000000000000000000000000..9fa242b1a1bf2d7e205232c2eb70a407319e2295 --- /dev/null +++ b/tests/expected/evalp/BS_CRD.csv @@ -0,0 +1,4 @@ +0.0114117580190,0.1524456042419,0.2471852027998 +0.0055324125593,0.1530792786029,0.2215030861964 +0.0101394313199,0.1220600742934,0.1986125040064 +nan,nan,nan diff --git a/tests/expected/evalp/BS_LBD.csv b/tests/expected/evalp/BS_LBD.csv new file mode 100644 index 0000000000000000000000000000000000000000..903f1023e8152bb24a3439e5c5695868c6d65bd4 --- /dev/null +++ b/tests/expected/evalp/BS_LBD.csv @@ -0,0 +1,4 @@ +0.0121598807967,0.1506234181408,0.2446148939211 +0.0080317462446,0.1473868836293,0.2133113575375 +0.0171912794414,0.1048221425794,0.1743227241709 +nan,nan,nan diff --git a/tests/expected/evalp/CR.csv b/tests/expected/evalp/CR.csv new file mode 100644 index 0000000000000000000000000000000000000000..a4a746929f3e5aef516ba7ca4e79cb9982e0f69f --- /dev/null +++ b/tests/expected/evalp/CR.csv @@ -0,0 +1 @@ +0.0064308681672,0.0353697749196 diff --git a/tests/expected/evalp/CRPS_FROM_BS.csv b/tests/expected/evalp/CRPS_FROM_BS.csv new file mode 100644 index 0000000000000000000000000000000000000000..7a155403cfd93a41adafa598edbc99227973ad0a --- /dev/null +++ b/tests/expected/evalp/CRPS_FROM_BS.csv @@ -0,0 +1 @@ +226.5713674310274 diff --git a/tests/expected/evalp/CRPS_FROM_ECDF.csv b/tests/expected/evalp/CRPS_FROM_ECDF.csv new file mode 100644 index 0000000000000000000000000000000000000000..a639c991d747ee29be096a2354aafdb4d737cb01 --- /dev/null +++ b/tests/expected/evalp/CRPS_FROM_ECDF.csv @@ -0,0 +1 @@ +262.615225902479 diff --git a/tests/expected/evalp/CRPS_FROM_QS.csv b/tests/expected/evalp/CRPS_FROM_QS.csv new file mode 100644 index 0000000000000000000000000000000000000000..c449874f5f2b510060ab8d33d4f09642f0f2221a --- /dev/null +++ b/tests/expected/evalp/CRPS_FROM_QS.csv @@ -0,0 +1 @@ +252.9569186533230 diff --git a/tests/expected/evalp/CSI.csv b/tests/expected/evalp/CSI.csv new file mode 100644 index 0000000000000000000000000000000000000000..ed9f3d5d8e6be62028c0a624937bc8b71aeac154 --- /dev/null +++ b/tests/expected/evalp/CSI.csv @@ -0,0 +1,52 @@ +0.4469453376206,0.3311897106109,0.2733118971061,nan +0.7792207792208,0.8108108108108,0.7032967032967,nan +0.7792207792208,0.8108108108108,0.7032967032967,nan +0.7792207792208,0.8108108108108,0.7032967032967,nan +0.7792207792208,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7843137254902,0.8108108108108,0.7032967032967,nan +0.7712418300654,0.8108108108108,0.7032967032967,nan +0.7712418300654,0.8108108108108,0.7032967032967,nan +0.7712418300654,0.8018018018018,0.7111111111111,nan diff --git a/tests/expected/evalp/DS.csv b/tests/expected/evalp/DS.csv new file mode 100644 index 0000000000000000000000000000000000000000..bf3ba37e57fefc74f9d488499775accd02e3a2d0 --- /dev/null +++ b/tests/expected/evalp/DS.csv @@ -0,0 +1 @@ +148.7901639344262 diff --git a/tests/expected/evalp/ES.csv b/tests/expected/evalp/ES.csv new file mode 100644 index 0000000000000000000000000000000000000000..63564433c01ac6a3420db76dbcf9b76a6fc6ffb3 --- /dev/null +++ b/tests/expected/evalp/ES.csv @@ -0,0 +1 @@ +587.2254970444062 diff --git a/tests/expected/evalp/FAR.csv b/tests/expected/evalp/FAR.csv new file mode 100644 index 0000000000000000000000000000000000000000..7f82d19402209cdc1006165e106dc64febbc30d5 --- /dev/null +++ b/tests/expected/evalp/FAR.csv @@ -0,0 +1,52 @@ +0.5530546623794,0.6688102893891,0.7266881028939,nan +0.1111111111111,0.0816326530612,0.0857142857143,nan +0.1111111111111,0.0816326530612,0.0857142857143,nan +0.1111111111111,0.0816326530612,0.0857142857143,nan +0.1111111111111,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1044776119403,0.0816326530612,0.0857142857143,nan +0.1060606060606,0.0816326530612,0.0857142857143,nan +0.1060606060606,0.0816326530612,0.0857142857143,nan +0.1060606060606,0.0824742268041,0.0724637681159,nan diff --git a/tests/expected/evalp/POD.csv b/tests/expected/evalp/POD.csv new file mode 100644 index 0000000000000000000000000000000000000000..80667c12ae32a3ce3b15b0f4ad6f17cefa1a9d74 --- /dev/null +++ b/tests/expected/evalp/POD.csv @@ -0,0 +1,52 @@ +1.0000000000000,1.0000000000000,1.0000000000000,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8633093525180,0.8737864077670,0.7529411764706,nan +0.8489208633094,0.8737864077670,0.7529411764706,nan +0.8489208633094,0.8737864077670,0.7529411764706,nan +0.8489208633094,0.8640776699029,0.7529411764706,nan diff --git a/tests/expected/evalp/POFD.csv b/tests/expected/evalp/POFD.csv new file mode 100644 index 0000000000000000000000000000000000000000..d3007cb09d09716c216dece30c97b68dd9752c3b --- /dev/null +++ b/tests/expected/evalp/POFD.csv @@ -0,0 +1,52 @@ +1.0000000000000,1.0000000000000,1.0000000000000,nan +0.0872093023256,0.0384615384615,0.0265486725664,nan +0.0872093023256,0.0384615384615,0.0265486725664,nan +0.0872093023256,0.0384615384615,0.0265486725664,nan +0.0872093023256,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0265486725664,nan +0.0813953488372,0.0384615384615,0.0221238938053,nan diff --git a/tests/expected/evalp/QS.csv b/tests/expected/evalp/QS.csv new file mode 100644 index 0000000000000000000000000000000000000000..10f11b5a009026f2a9fc07994ed57e0854f3e690 --- /dev/null +++ b/tests/expected/evalp/QS.csv @@ -0,0 +1 @@ +345.9157803611179,345.0692555033388,343.1293593865944,340.7098689092258,338.2815978233983,335.9735345040806,333.5551570615883,330.3324264160278,327.3335394509029,324.3259955478602,321.1900816225579,318.1751174870145,315.1221864951768,311.9720504575810,308.6449418748451,305.6121691813011,302.1695523126391,298.4459559732869,294.9746475389559,291.2738065792731,287.7245857036857,284.1019045263419,280.2355923818945,276.2186495176851,272.5014840465003,268.6527331189711,264.7401681919366,260.8558001484045,256.9032896364086,252.9262923571603,248.9312391788272,244.9863962404153,240.6629977739305,236.3289636408610,232.0897848132574,227.3870887954491,222.9760079149148,218.6999752658918,214.0996784565916,209.6725204056392,205.1895869403907,200.3957457333661,195.2372000989366,190.0801385110065,185.3842443729902,180.6178580262183,174.5832302745488,169.1540934949294,163.1109324758844,156.2747959436064,147.5753153598814 diff --git a/tests/expected/evalp/RANK_HIST.csv b/tests/expected/evalp/RANK_HIST.csv new file mode 100644 index 0000000000000000000000000000000000000000..c35d0094a8725513a0e888bd151ab70f0fcb9b60 --- /dev/null +++ b/tests/expected/evalp/RANK_HIST.csv @@ -0,0 +1 @@ +0.6077170418006,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0032154340836,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0032154340836,0.0000000000000,0.0032154340836,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0032154340836,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0000000000000,0.0064308681672,0.0000000000000,0.0000000000000,0.0032154340836,0.0064308681672,0.0000000000000,0.0000000000000,0.0000000000000,0.0032154340836,0.0000000000000,0.0000000000000,0.0032154340836,0.0032154340836,0.0032154340836,0.0000000000000,0.0064308681672,0.3440514469453 diff --git a/tests/expected/evalp/REL_DIAG.csv b/tests/expected/evalp/REL_DIAG.csv new file mode 100644 index 0000000000000000000000000000000000000000..3568332be044fda594e68d46f3c9ac93bfad0af4 --- /dev/null +++ b/tests/expected/evalp/REL_DIAG.csv @@ -0,0 +1,208 @@ +0.0000000000000,0.1060606060606,132.0000000000000 +0.0196078431373,0.0000000000000,0.0000000000000 +0.0392156862745,0.0000000000000,0.0000000000000 +0.0588235294118,0.0000000000000,2.0000000000000 +0.0784313725490,0.0000000000000,0.0000000000000 +0.0980392156863,0.0000000000000,0.0000000000000 +0.1176470588235,0.0000000000000,0.0000000000000 +0.1372549019608,0.0000000000000,0.0000000000000 +0.1568627450980,0.0000000000000,0.0000000000000 +0.1764705882353,0.0000000000000,0.0000000000000 +0.1960784313725,0.0000000000000,0.0000000000000 +0.2156862745098,0.0000000000000,0.0000000000000 +0.2352941176471,0.0000000000000,0.0000000000000 +0.2549019607843,0.0000000000000,0.0000000000000 +0.2745098039216,0.0000000000000,0.0000000000000 +0.2941176470588,0.0000000000000,0.0000000000000 +0.3137254901961,0.0000000000000,0.0000000000000 +0.3333333333333,0.0000000000000,0.0000000000000 +0.3529411764706,0.0000000000000,0.0000000000000 +0.3725490196078,0.0000000000000,0.0000000000000 +0.3921568627451,0.0000000000000,0.0000000000000 +0.4117647058824,0.0000000000000,0.0000000000000 +0.4313725490196,0.0000000000000,0.0000000000000 +0.4509803921569,0.0000000000000,0.0000000000000 +0.4705882352941,0.0000000000000,0.0000000000000 +0.4901960784314,0.0000000000000,0.0000000000000 +0.5098039215686,0.0000000000000,0.0000000000000 +0.5294117647059,0.0000000000000,0.0000000000000 +0.5490196078431,0.0000000000000,0.0000000000000 +0.5686274509804,0.0000000000000,0.0000000000000 +0.5882352941176,0.0000000000000,0.0000000000000 +0.6078431372549,0.0000000000000,0.0000000000000 +0.6274509803922,0.0000000000000,0.0000000000000 +0.6470588235294,0.0000000000000,0.0000000000000 +0.6666666666667,0.0000000000000,0.0000000000000 +0.6862745098039,0.0000000000000,0.0000000000000 +0.7058823529412,0.0000000000000,0.0000000000000 +0.7254901960784,0.0000000000000,0.0000000000000 +0.7450980392157,0.0000000000000,0.0000000000000 +0.7647058823529,0.0000000000000,0.0000000000000 +0.7843137254902,0.0000000000000,0.0000000000000 +0.8039215686275,0.0000000000000,0.0000000000000 +0.8235294117647,0.0000000000000,0.0000000000000 +0.8431372549020,0.0000000000000,0.0000000000000 +0.8627450980392,0.0000000000000,0.0000000000000 +0.8823529411765,0.0000000000000,0.0000000000000 +0.9019607843137,0.0000000000000,0.0000000000000 +0.9215686274510,1.0000000000000,1.0000000000000 +0.9411764705882,0.0000000000000,0.0000000000000 +0.9607843137255,0.0000000000000,0.0000000000000 +0.9803921568627,0.0000000000000,0.0000000000000 +1.0000000000000,0.8920454545455,176.0000000000000 +0.0000000000000,0.0842105263158,95.0000000000000 +0.0196078431373,0.0000000000000,1.0000000000000 +0.0392156862745,0.0000000000000,0.0000000000000 +0.0588235294118,0.0000000000000,0.0000000000000 +0.0784313725490,0.0000000000000,0.0000000000000 +0.0980392156863,0.0000000000000,0.0000000000000 +0.1176470588235,0.0000000000000,0.0000000000000 +0.1372549019608,0.0000000000000,0.0000000000000 +0.1568627450980,0.0000000000000,0.0000000000000 +0.1764705882353,0.0000000000000,0.0000000000000 +0.1960784313725,0.0000000000000,0.0000000000000 +0.2156862745098,0.0000000000000,0.0000000000000 +0.2352941176471,0.0000000000000,0.0000000000000 +0.2549019607843,0.0000000000000,0.0000000000000 +0.2745098039216,0.0000000000000,0.0000000000000 +0.2941176470588,0.0000000000000,0.0000000000000 +0.3137254901961,0.0000000000000,0.0000000000000 +0.3333333333333,0.0000000000000,0.0000000000000 +0.3529411764706,0.0000000000000,0.0000000000000 +0.3725490196078,0.0000000000000,0.0000000000000 +0.3921568627451,0.0000000000000,0.0000000000000 +0.4117647058824,0.0000000000000,0.0000000000000 +0.4313725490196,0.0000000000000,0.0000000000000 +0.4509803921569,0.0000000000000,0.0000000000000 +0.4705882352941,0.0000000000000,0.0000000000000 +0.4901960784314,0.0000000000000,0.0000000000000 +0.5098039215686,0.0000000000000,0.0000000000000 +0.5294117647059,0.0000000000000,0.0000000000000 +0.5490196078431,0.0000000000000,0.0000000000000 +0.5686274509804,0.0000000000000,0.0000000000000 +0.5882352941176,0.0000000000000,0.0000000000000 +0.6078431372549,0.0000000000000,0.0000000000000 +0.6274509803922,0.0000000000000,0.0000000000000 +0.6470588235294,0.0000000000000,0.0000000000000 +0.6666666666667,0.0000000000000,0.0000000000000 +0.6862745098039,0.0000000000000,0.0000000000000 +0.7058823529412,0.0000000000000,0.0000000000000 +0.7254901960784,0.0000000000000,0.0000000000000 +0.7450980392157,0.0000000000000,0.0000000000000 +0.7647058823529,0.0000000000000,0.0000000000000 +0.7843137254902,0.0000000000000,0.0000000000000 +0.8039215686275,0.0000000000000,0.0000000000000 +0.8235294117647,0.0000000000000,0.0000000000000 +0.8431372549020,0.0000000000000,0.0000000000000 +0.8627450980392,0.0000000000000,0.0000000000000 +0.8823529411765,0.0000000000000,0.0000000000000 +0.9019607843137,0.0000000000000,0.0000000000000 +0.9215686274510,0.0000000000000,0.0000000000000 +0.9411764705882,0.0000000000000,0.0000000000000 +0.9607843137255,0.0000000000000,0.0000000000000 +0.9803921568627,0.0000000000000,0.0000000000000 +1.0000000000000,0.9302325581395,215.0000000000000 +0.0000000000000,0.0724637681159,69.0000000000000 +0.0196078431373,1.0000000000000,1.0000000000000 +0.0392156862745,0.0000000000000,0.0000000000000 +0.0588235294118,0.0000000000000,0.0000000000000 +0.0784313725490,0.0000000000000,0.0000000000000 +0.0980392156863,0.0000000000000,0.0000000000000 +0.1176470588235,0.0000000000000,0.0000000000000 +0.1372549019608,0.0000000000000,0.0000000000000 +0.1568627450980,0.0000000000000,0.0000000000000 +0.1764705882353,0.0000000000000,0.0000000000000 +0.1960784313725,0.0000000000000,0.0000000000000 +0.2156862745098,0.0000000000000,0.0000000000000 +0.2352941176471,0.0000000000000,0.0000000000000 +0.2549019607843,0.0000000000000,0.0000000000000 +0.2745098039216,0.0000000000000,0.0000000000000 +0.2941176470588,0.0000000000000,0.0000000000000 +0.3137254901961,0.0000000000000,0.0000000000000 +0.3333333333333,0.0000000000000,0.0000000000000 +0.3529411764706,0.0000000000000,0.0000000000000 +0.3725490196078,0.0000000000000,0.0000000000000 +0.3921568627451,0.0000000000000,0.0000000000000 +0.4117647058824,0.0000000000000,0.0000000000000 +0.4313725490196,0.0000000000000,0.0000000000000 +0.4509803921569,0.0000000000000,0.0000000000000 +0.4705882352941,0.0000000000000,0.0000000000000 +0.4901960784314,0.0000000000000,0.0000000000000 +0.5098039215686,0.0000000000000,0.0000000000000 +0.5294117647059,0.0000000000000,0.0000000000000 +0.5490196078431,0.0000000000000,0.0000000000000 +0.5686274509804,0.0000000000000,0.0000000000000 +0.5882352941176,0.0000000000000,0.0000000000000 +0.6078431372549,0.0000000000000,0.0000000000000 +0.6274509803922,0.0000000000000,0.0000000000000 +0.6470588235294,0.0000000000000,0.0000000000000 +0.6666666666667,0.0000000000000,0.0000000000000 +0.6862745098039,0.0000000000000,0.0000000000000 +0.7058823529412,0.0000000000000,0.0000000000000 +0.7254901960784,0.0000000000000,0.0000000000000 +0.7450980392157,0.0000000000000,0.0000000000000 +0.7647058823529,0.0000000000000,0.0000000000000 +0.7843137254902,0.0000000000000,0.0000000000000 +0.8039215686275,0.0000000000000,0.0000000000000 +0.8235294117647,0.0000000000000,0.0000000000000 +0.8431372549020,0.0000000000000,0.0000000000000 +0.8627450980392,0.0000000000000,0.0000000000000 +0.8823529411765,0.0000000000000,0.0000000000000 +0.9019607843137,0.0000000000000,0.0000000000000 +0.9215686274510,0.0000000000000,0.0000000000000 +0.9411764705882,0.0000000000000,0.0000000000000 +0.9607843137255,0.0000000000000,0.0000000000000 +0.9803921568627,0.0000000000000,0.0000000000000 +1.0000000000000,0.9128630705394,241.0000000000000 +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan +nan,nan,nan diff --git a/tests/expected/evalp/ROCSS.csv b/tests/expected/evalp/ROCSS.csv new file mode 100644 index 0000000000000000000000000000000000000000..ab68dea18a015816e96dbd71aca33f6bbbd64567 --- /dev/null +++ b/tests/expected/evalp/ROCSS.csv @@ -0,0 +1,4 @@ +0.7108499247114 +0.8017176997760 +0.7130661114003 +nan diff --git a/tests/expected/evalp/WS.csv b/tests/expected/evalp/WS.csv new file mode 100644 index 0000000000000000000000000000000000000000..82b07bb09227e378a17fa2a7a485f5371f36cccf --- /dev/null +++ b/tests/expected/evalp/WS.csv @@ -0,0 +1 @@ +764.4471750114835,2578.1382636655953 diff --git a/tests/expected/evalp/WSS.csv b/tests/expected/evalp/WSS.csv new file mode 100644 index 0000000000000000000000000000000000000000..fd929dabd8f58354be587455dcbd4402d6a9f83d --- /dev/null +++ b/tests/expected/evalp/WSS.csv @@ -0,0 +1 @@ +0.6621887740287,0.4360388849930 diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp index 14358994aa579e404e4eb767f9dc004da37103b6..8e0c2497e7c6647b3be44330e85cfd29b192c093 100644 --- a/tests/test_determinist.cpp +++ b/tests/test_determinist.cpp @@ -1,12 +1,22 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + #include <fstream> #include <vector> #include <tuple> #include <array> +#include <string> +#include <unordered_map> + #include <gtest/gtest.h> #include <xtensor/xtensor.hpp> +#include <xtensor/xarray.hpp> #include <xtensor/xview.hpp> #include <xtensor/xmanipulation.hpp> +#include <xtensor/xmath.hpp> +#include <xtensor/xsort.hpp> #include <xtensor/xcsv.hpp> #include "evalhyd/evald.hpp" @@ -17,23 +27,54 @@ using namespace xt::placeholders; // required for `_` to work + +std::vector<std::string> all_metrics_d = { + "MAE", "MARE", "MSE", "RMSE", + "NSE", "KGE", "KGE_D", "KGEPRIME", "KGEPRIME_D", + // --------------------------------------------------------------------- + // TODO: bring back when `xt::argsort` supports stable sorting + // so that the r_spearman component of KGENP and KGENP_D + // yields consistent results across compilers + // https://github.com/xtensor-stack/xtensor/issues/2677 + // "KGENP", "KGENP_D", + // --------------------------------------------------------------------- + "CONT_TBL" +}; + std::tuple<xt::xtensor<double, 2>, xt::xtensor<double, 2>> load_data_d() { // read in data std::ifstream ifs; - ifs.open(EVALHYD_DATA_DIR "/q_obs.csv"); + ifs.open(EVALHYD_DATA_DIR "/data/q_obs.csv"); xt::xtensor<double, 2> observed = xt::load_csv<int>(ifs); ifs.close(); - ifs.open(EVALHYD_DATA_DIR "/q_prd.csv"); - xt::xtensor<double, 2> predicted = xt::view( - xt::load_csv<double>(ifs), xt::range(0, 5), xt::all() - ); + ifs.open(EVALHYD_DATA_DIR "/data/q_prd.csv"); + xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs); ifs.close(); return std::make_tuple(observed, predicted); } +std::unordered_map<std::string, xt::xarray<double>> load_expected_d() +{ + // read in expected results + std::ifstream ifs; + std::unordered_map<std::string, xt::xarray<double>> expected; + + for (const auto& metric : all_metrics_d) + { + ifs.open(EVALHYD_DATA_DIR "/expected/evald/" + metric + ".csv"); + expected[metric] = xt::view( + xt::squeeze(xt::load_csv<double>(ifs)), + xt::all(), xt::newaxis(), xt::newaxis() + ); + ifs.close(); + } + + return expected; +} + TEST(DeterministTests, TestMetrics) { // read in data @@ -41,44 +82,35 @@ TEST(DeterministTests, TestMetrics) xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + + // read in expected results + auto expected = load_expected_d(); + // compute scores (with 2D tensors) - std::vector<xt::xarray<double>> metrics = + std::vector<xt::xarray<double>> results = evalhyd::evald( - observed, predicted, {"RMSE", "NSE", "KGE", "KGEPRIME"} + observed, predicted, all_metrics_d, thresholds, "high" ); - // check results on all metrics - xt::xtensor<double, 3> rmse = - {{{777.034272}}, - {{776.878479}}, - {{777.800217}}, - {{778.151082}}, - {{778.61487 }}}; - EXPECT_TRUE(xt::allclose(metrics[0], rmse)); - - xt::xtensor<double, 3> nse = - {{{0.718912}}, - {{0.719025}}, - {{0.718358}}, - {{0.718104}}, - {{0.717767}}}; - EXPECT_TRUE(xt::allclose(metrics[1], nse)); - - xt::xtensor<double, 3> kge = - {{{0.748088}}, - {{0.746106}}, - {{0.744111}}, - {{0.743011}}, - {{0.741768}}}; - EXPECT_TRUE(xt::allclose(metrics[2], kge)); - - xt::xtensor<double, 3> kgeprime = - {{{0.813141}}, - {{0.812775}}, - {{0.812032}}, - {{0.811787}}, - {{0.811387}}}; - EXPECT_TRUE(xt::allclose(metrics[3], kgeprime)); + // check results + for (std::size_t m = 0; m < all_metrics_d.size(); m++) + { + if (all_metrics_d[m] == "CONT_TBL") + { + // /!\ stacked-up thresholds in CSV file because 5D metric, + // so need to resize array + expected[all_metrics_d[m]].resize( + {predicted.shape(0), std::size_t {1}, std::size_t {1}, + thresholds.shape(1), std::size_t {4}} + ); + } + + EXPECT_TRUE(xt::all(xt::isclose( + results[m], expected[all_metrics_d[m]], 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; + } } TEST(DeterministTests, TestTransform) @@ -88,50 +120,89 @@ TEST(DeterministTests, TestTransform) xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // compute and check results on square-rooted streamflow series - std::vector<xt::xarray<double>> metrics = - evalhyd::evald(observed, predicted, {"NSE"}, "sqrt"); + std::vector<xt::xarray<double>> results_sqrt = + evalhyd::evald(observed, predicted, all_metrics_d, + thresholds, "high", "sqrt"); - xt::xtensor<double, 3> nse_sqrt = - {{{0.882817}}, - {{0.883023}}, - {{0.883019}}, - {{0.883029}}, - {{0.882972}}}; - EXPECT_TRUE(xt::allclose(metrics[0], nse_sqrt)); + xt::xtensor<double, 2> obs_sqrt = xt::sqrt(observed); + xt::xtensor<double, 2> prd_sqrt = xt::sqrt(predicted); + xt::xtensor<double, 2> thr_sqrt = xt::sqrt(thresholds); + + std::vector<xt::xarray<double>> results_sqrt_ = + evalhyd::evald(obs_sqrt, prd_sqrt, all_metrics_d, + thr_sqrt, "high"); + + for (std::size_t m = 0; m < all_metrics_d.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results_sqrt[m], results_sqrt_[m], 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; + } // compute and check results on inverted streamflow series - metrics = evalhyd::evald(observed, predicted, {"NSE"}, "inv"); + std::vector<xt::xarray<double>> results_inv = + evalhyd::evald(observed, predicted, all_metrics_d, + thresholds, "high", "inv"); + + xt::xtensor<double, 2> epsilon = xt::mean(observed, {1}, xt::keep_dims) * 0.01; + xt::xtensor<double, 2> obs_inv = 1. / (observed + epsilon); + xt::xtensor<double, 2> prd_inv = 1. / (predicted + epsilon); + xt::xtensor<double, 2> thr_inv = 1. / (thresholds + epsilon); - xt::xtensor<double, 3> nse_inv = - {{{0.737323}}, - {{0.737404}}, - {{0.737429}}, - {{0.737546}}, - {{0.737595}}}; - EXPECT_TRUE(xt::allclose(metrics[0], nse_inv)); + std::vector<xt::xarray<double>> results_inv_ = + evalhyd::evald(obs_inv, prd_inv, all_metrics_d, + thr_inv, "high"); + + for (std::size_t m = 0; m < all_metrics_d.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results_inv[m], results_inv_[m], 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; + } // compute and check results on square-rooted streamflow series - metrics = evalhyd::evald(observed, predicted, {"NSE"}, "log"); + std::vector<xt::xarray<double>> results_log = + evalhyd::evald(observed, predicted, all_metrics_d, + thresholds, "high", "log"); - xt::xtensor<double, 3> nse_log = - {{{0.893344}}, - {{0.893523}}, - {{0.893585}}, - {{0.893758}}, - {{0.893793}}}; - EXPECT_TRUE(xt::allclose(metrics[0], nse_log)); + xt::xtensor<double, 2> obs_log = xt::log(observed + epsilon); + xt::xtensor<double, 2> prd_log = xt::log(predicted + epsilon); + xt::xtensor<double, 2> thr_log = xt::log(thresholds + epsilon); + + std::vector<xt::xarray<double>> results_log_ = + evalhyd::evald(obs_log, prd_log, all_metrics_d, + thr_log, "high"); + + for (std::size_t m = 0; m < all_metrics_d.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results_log[m], results_log_[m], 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; + } // compute and check results on power-transformed streamflow series - metrics = evalhyd::evald(observed, predicted, {"NSE"}, "pow", 0.2); + std::vector<xt::xarray<double>> results_pow = + evalhyd::evald(observed, predicted, all_metrics_d, + thresholds, "high", "pow", 0.2); + + xt::xtensor<double, 2> obs_pow = xt::pow(observed, 0.2); + xt::xtensor<double, 2> prd_pow = xt::pow(predicted, 0.2); + xt::xtensor<double, 2> thr_pow = xt::pow(thresholds, 0.2); - xt::xtensor<double, 3> nse_pow = - {{{0.899207}}, - {{0.899395}}, - {{0.899451}}, - {{0.899578}}, - {{0.899588}}}; - EXPECT_TRUE(xt::allclose(metrics[0], nse_pow)); + std::vector<xt::xarray<double>> results_pow_ = + evalhyd::evald(obs_pow, prd_pow, all_metrics_d, + thr_pow, "high"); + + for (std::size_t m = 0; m < all_metrics_d.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results_pow[m], results_pow_[m], 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; + } } @@ -142,151 +213,182 @@ TEST(DeterministTests, TestMasks) xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // generate temporal subset by dropping 20 first time steps - xt::xtensor<double, 2> masks = - xt::ones<bool>({std::size_t {1}, std::size_t {observed.size()}}); - xt::view(masks, 0, xt::range(0, 20)) = 0; + xt::xtensor<bool, 3> masks = + xt::ones<bool>({std::size_t {predicted.shape(0)}, + std::size_t {1}, + std::size_t {observed.size()}}); + xt::view(masks, xt::all(), 0, xt::range(0, 20)) = 0; // compute scores using masks to subset whole record - std::vector<std::string> metrics = - {"RMSE", "NSE", "KGE", "KGEPRIME"}; - std::vector<xt::xarray<double>> metrics_masked = - evalhyd::evald(observed, predicted, metrics, "none", 1, -9, masks); + evalhyd::evald(observed, predicted, all_metrics_d, + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + masks); // compute scores on pre-computed subset of whole record xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _)); xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _)); std::vector<xt::xarray<double>> metrics_subset = - evalhyd::evald(obs, prd, metrics); + evalhyd::evald(obs, prd, all_metrics_d, thresholds, "high"); // check results are identical - for (int m = 0; m < metrics.size(); m++) + for (std::size_t m = 0; m < all_metrics_d.size(); m++) { - EXPECT_TRUE(xt::allclose(metrics_masked[m], metrics_subset[m])) - << "Failure for (" << metrics[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + metrics_masked[m], metrics_subset[m], 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } } TEST(DeterministTests, TestMaskingConditions) { - std::vector<std::string> metrics = - {"RMSE", "NSE", "KGE", "KGEPRIME"}; - // read in data xt::xtensor<double, 2> observed; xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // generate dummy empty masks required to access next optional argument - xt::xtensor<bool, 2> masks; + xt::xtensor<bool, 3> masks; // conditions on streamflow values _________________________________________ // compute scores using masking conditions on streamflow to subset whole record - xt::xtensor<std::array<char, 32>, 1> q_conditions = { - std::array<char, 32> {"q_obs{<2000,>3000}"} + xt::xtensor<std::array<char, 32>, 2> q_conditions = { + {std::array<char, 32>{"q_obs{<2000,>3000}"}} }; + q_conditions = xt::repeat(q_conditions, predicted.shape(0), 0); std::vector<xt::xarray<double>> metrics_q_conditioned = evalhyd::evald( - observed, predicted, metrics, - "none", 1, -9, masks, q_conditions + observed, predicted, all_metrics_d, + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + masks, q_conditions ); // compute scores using "NaN-ed" time indices where conditions on streamflow met std::vector<xt::xarray<double>> metrics_q_preconditioned = evalhyd::evald( - xt::where((observed < 2000) | (observed > 3000), observed, NAN), + xt::eval(xt::where((observed < 2000) | (observed > 3000), observed, NAN)), predicted, - metrics + all_metrics_d, + thresholds, + "high" ); // check results are identical - for (int m = 0; m < metrics.size(); m++) + for (std::size_t m = 0; m < all_metrics_d.size(); m++) { - EXPECT_TRUE( - xt::allclose( - metrics_q_conditioned[m], metrics_q_preconditioned[m] - ) - ) << "Failure for (" << metrics[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + metrics_q_conditioned[m], metrics_q_preconditioned[m], + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } // conditions on streamflow statistics _____________________________________ // compute scores using masking conditions on streamflow to subset whole record - xt::xtensor<std::array<char, 32>, 1> q_conditions_ ={ - std::array<char, 32> {"q_obs{>=mean}"} + xt::xtensor<std::array<char, 32>, 2> q_conditions_ = { + {std::array<char, 32>{"q_obs{>=mean}"}} }; + q_conditions_ = xt::repeat(q_conditions_, predicted.shape(0), 0); double mean = xt::mean(observed, {1})(); std::vector<xt::xarray<double>> metrics_q_conditioned_ = evalhyd::evald( - observed, predicted, metrics, - "none", 1, -9, masks, q_conditions_ + observed, predicted, all_metrics_d, + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + masks, q_conditions_ ); // compute scores using "NaN-ed" time indices where conditions on streamflow met std::vector<xt::xarray<double>> metrics_q_preconditioned_ = evalhyd::evald( - xt::where(observed >= mean, observed, NAN), + xt::eval(xt::where(observed >= mean, observed, NAN)), predicted, - metrics + all_metrics_d, + thresholds, + "high" ); // check results are identical - for (int m = 0; m < metrics.size(); m++) + for (std::size_t m = 0; m < all_metrics_d.size(); m++) { - EXPECT_TRUE( - xt::allclose( - metrics_q_conditioned[m], metrics_q_preconditioned[m] - ) - ) << "Failure for (" << metrics[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + metrics_q_conditioned_[m], metrics_q_preconditioned_[m], + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } // conditions on temporal indices __________________________________________ // compute scores using masking conditions on time indices to subset whole record - xt::xtensor<std::array<char, 32>, 1> t_conditions = { - std::array<char, 32> {"t{0,1,2,3,4,5:97,97,98,99}"} + xt::xtensor<std::array<char, 32>, 2> t_conditions = { + {std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}} }; + t_conditions = xt::repeat(t_conditions, predicted.shape(0), 0); std::vector<xt::xarray<double>> metrics_t_conditioned = evalhyd::evald( - observed, predicted, metrics, - "none", 1, -9, masks, t_conditions + observed, predicted, all_metrics_d, + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + masks, t_conditions ); // compute scores on already subset time series std::vector<xt::xarray<double>> metrics_t_subset = evalhyd::evald( - xt::view(observed, xt::all(), xt::range(0, 100)), - xt::view(predicted, xt::all(), xt::range(0, 100)), - metrics + xt::eval(xt::view(observed, xt::all(), xt::range(0, 100))), + xt::eval(xt::view(predicted, xt::all(), xt::range(0, 100))), + all_metrics_d, + thresholds, + "high" ); // check results are identical - for (int m = 0; m < metrics.size(); m++) + for (std::size_t m = 0; m < all_metrics_d.size(); m++) { - EXPECT_TRUE( - xt::allclose( - metrics_t_conditioned[m], metrics_t_subset[m] - ) - ) << "Failure for (" << metrics[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + metrics_t_conditioned[m], metrics_t_subset[m], + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } } TEST(DeterministTests, TestMissingData) { - std::vector<std::string> metrics = - {"RMSE", "NSE", "KGE", "KGEPRIME"}; - // read in data xt::xtensor<double, 2> observed; xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); + predicted = xt::view(predicted, xt::range(0, 5), xt::all()); + + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); // add some missing observations artificially by assigning NaN values xt::view(observed, xt::all(), xt::range(0, 20)) = NAN; @@ -299,68 +401,76 @@ TEST(DeterministTests, TestMissingData) // compute metrics with observations containing NaN values std::vector<xt::xarray<double>> metrics_nan = - evalhyd::evald(observed, predicted, metrics); + evalhyd::evald(observed, predicted, all_metrics_d, thresholds, "high"); - for (int m = 0; m < metrics.size(); m++) { - for (int p = 0; p < predicted.shape(0); p++) { + for (std::size_t m = 0; m < all_metrics_d.size(); m++) + { + for (std::size_t p = 0; p < predicted.shape(0); p++) + { // compute metrics on subset of observations and predictions (i.e. // eliminating region with NaN in observations or predictions manually) xt::xtensor<double, 1> obs = xt::view(observed, 0, xt::range(20+(3*(p+1)), _)); xt::xtensor<double, 1> prd = xt::view(predicted, p, xt::range(20+(3*(p+1)), _)); + xt::xtensor<double, 1> thr = + xt::view(thresholds, p); std::vector<xt::xarray<double>> metrics_sbs = - evalhyd::evald(xt::view(obs, xt::newaxis(), xt::all()), - xt::view(prd, xt::newaxis(), xt::all()), - {metrics[m]}); + evalhyd::evald( + xt::eval(xt::view(obs, xt::newaxis(), xt::all())), + xt::eval(xt::view(prd, xt::newaxis(), xt::all())), + {all_metrics_d[m]}, + xt::eval(xt::view(thr, xt::newaxis(), xt::all())), + "high" + ); // compare to check results are the same - EXPECT_TRUE( - xt::allclose( - xt::view(metrics_nan[m], p), - metrics_sbs[0] - ) - ) << "Failure for (" << metrics[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + xt::view(metrics_nan[m], p), metrics_sbs[0], + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } } } TEST(DeterministTests, TestBootstrap) { - std::vector<std::string> metrics = - {"RMSE", "NSE", "KGE", "KGEPRIME"}; - // read in data std::ifstream ifs; - ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv"); xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1)); ifs.close(); std::vector<std::string> datetimes (x_dts.begin(), x_dts.end()); - ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv"); xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1)); ifs.close(); - ifs.open(EVALHYD_DATA_DIR "/q_prd_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/data/q_prd_1yr.csv"); xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1); ifs.close(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // compute metrics via bootstrap std::unordered_map<std::string, int> bootstrap = {{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}}; std::vector<xt::xarray<double>> metrics_bts = evalhyd::evald( - xt::view(observed, xt::newaxis(), xt::all()), + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), predicted, - metrics, - "none", // transform - 1, // exponent - -9, // epsilon - {}, // t_msk - {}, // m_cdt + all_metrics_d, + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + xt::xtensor<bool, 3>({}), // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt bootstrap, datetimes ); @@ -378,18 +488,191 @@ TEST(DeterministTests, TestBootstrap) std::vector<xt::xarray<double>> metrics_rep = evalhyd::evald( - xt::view(observed_x3, xt::newaxis(), xt::all()), + xt::eval(xt::view(observed_x3, xt::newaxis(), xt::all())), predicted_x3, - metrics + all_metrics_d, + thresholds, + "high" ); // check results are identical - for (int m = 0; m < metrics.size(); m++) + for (std::size_t m = 0; m < all_metrics_d.size(); m++) { - EXPECT_TRUE( - xt::allclose( - metrics_bts[m], metrics_rep[m] - ) - ) << "Failure for (" << metrics[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + metrics_bts[m], metrics_rep[m], 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } } + +TEST(DeterministTests, TestBootstrapSummary) +{ + // read in data + std::ifstream ifs; + + ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv"); + xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1)); + ifs.close(); + std::vector<std::string> datetimes (x_dts.begin(), x_dts.end()); + + ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv"); + xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1)); + ifs.close(); + + ifs.open(EVALHYD_DATA_DIR "/data/q_prd_1yr.csv"); + xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1); + ifs.close(); + + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + + // compute metrics via bootstrap with raw summary + std::unordered_map<std::string, int> bootstrap_0 = + {{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}}; + + std::vector<xt::xarray<double>> metrics_raw = + evalhyd::evald( + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + predicted, + all_metrics_d, + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + xt::xtensor<bool, 3>({}), // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + bootstrap_0, + datetimes + ); + + // compute metrics via bootstrap with mean and standard deviation summary + std::unordered_map<std::string, int> bootstrap_1 = + {{"n_samples", 10}, {"len_sample", 3}, {"summary", 1}}; + + std::vector<xt::xarray<double>> metrics_mas = + evalhyd::evald( + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + predicted, + all_metrics_d, + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + xt::xtensor<bool, 3>({}), // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + bootstrap_1, + datetimes + ); + + // check results are identical + for (std::size_t m = 0; m < all_metrics_d.size(); m++) + { + // mean + EXPECT_TRUE(xt::all(xt::isclose( + xt::mean(metrics_raw[m], {2}), + xt::view(metrics_mas[m], xt::all(), xt::all(), 0), + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ") on mean"; + // standard deviation + EXPECT_TRUE(xt::all(xt::isclose( + xt::stddev(metrics_raw[m], {2}), + xt::view(metrics_mas[m], xt::all(), xt::all(), 1), + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ") on standard deviation"; + } + + // compute metrics via bootstrap with quantiles summary + std::unordered_map<std::string, int> bootstrap_2 = + {{"n_samples", 10}, {"len_sample", 3}, {"summary", 2}}; + + std::vector<xt::xarray<double>> metrics_qtl = + evalhyd::evald( + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + predicted, + all_metrics_d, + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + xt::xtensor<bool, 3>({}), // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + bootstrap_2, + datetimes + ); + + // check results are identical + for (std::size_t m = 0; m < all_metrics_d.size(); m++) + { + // quantiles + std::vector<double> quantiles = {0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95}; + std::size_t i = 0; + + for (auto q : quantiles) + { + EXPECT_TRUE(xt::all(xt::isclose( + xt::quantile(metrics_raw[m], {q}, 2), + xt::view(metrics_qtl[m], xt::all(), xt::all(), i), + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ") on quantile " << q; + i++; + } + } +} + +TEST(DeterministTests, TestCompleteness) +{ + std::vector<std::string> diags = {"completeness"}; + + // compute metrics on series with NaN + xt::xtensor<double, 2> prd = { + { 5.3, NAN, 5.7, 2.3, 3.3, 4.1 }, + { 4.3, 4.2, 4.7, 4.3, 3.3, 2.8 }, + { 5.3, NAN, 5.7, 2.3, 3.8, NAN } + }; + + xt::xtensor<double, 2> obs = + {{ 4.7, 4.3, NAN, 2.7, 4.1, 5.0 }}; + + xt::xtensor<bool, 3> msk = { + {{ true, true, true, false, true, true }, + { true, true, true, true, true, true }}, + {{ true, true, true, true, true, false }, + { true, true, true, true, true, true }}, + {{ true, true, true, false, false, true }, + { true, true, true, true, true, true }} + }; + + std::vector<xt::xarray<double>> results = + evalhyd::evald( + obs, + prd, + std::vector<std::string> {}, // metrics + xt::xtensor<double, 2>({}), // thresholds + xtl::missing<const std::string>(), // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + msk, // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + xtl::missing<const std::unordered_map<std::string, int>>(), // bootstrap + {}, // dts + xtl::missing<const int>(), // seed + diags + ); + + // check that numerical results are identical + xt::xtensor<double, 3> expected = { + {{ 3. }, + { 4. }}, + {{ 4. }, + { 5. }}, + {{ 1. }, + { 3. }} + }; + + EXPECT_TRUE( + xt::all(xt::isclose(results[0], expected, 1e-05, 1e-08, true)) + ); +} diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp index 7f72a7fa6bcb9871299c8b54a0e79a862d5fc9c9..65b16cdde5c4e0b30280bed9a96a8cfea6f36ba9 100644 --- a/tests/test_probabilist.cpp +++ b/tests/test_probabilist.cpp @@ -1,10 +1,21 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + #include <fstream> #include <vector> #include <tuple> #include <array> +#include <string> +#include <unordered_map> + #include <gtest/gtest.h> + +#include <xtl/xoptional.hpp> #include <xtensor/xtensor.hpp> +#include <xtensor/xarray.hpp> #include <xtensor/xview.hpp> +#include <xtensor/xmath.hpp> #include <xtensor/xsort.hpp> #include <xtensor/xmanipulation.hpp> #include <xtensor/xcsv.hpp> @@ -17,21 +28,52 @@ using namespace xt::placeholders; // required for `_` to work + +std::vector<std::string> all_metrics_p = { + "BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", "CRPS_FROM_BS", + "CRPS_FROM_ECDF", + "QS", "CRPS_FROM_QS", + "POD", "POFD", "FAR", "CSI", "ROCSS", + "RANK_HIST", "DS", "AS", + "CR", "AW", "AWN", "AWI", "WS", "WSS", + "ES" +}; + std::tuple<xt::xtensor<double, 1>, xt::xtensor<double, 2>> load_data_p() { // read in data std::ifstream ifs; - ifs.open(EVALHYD_DATA_DIR "/q_obs.csv"); + ifs.open(EVALHYD_DATA_DIR "/data/q_obs.csv"); xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<int>(ifs)); ifs.close(); - ifs.open(EVALHYD_DATA_DIR "/q_prd.csv"); + ifs.open(EVALHYD_DATA_DIR "/data/q_prd.csv"); xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs); ifs.close(); return std::make_tuple(observed, predicted); } +std::unordered_map<std::string, xt::xarray<double>> load_expected_p() +{ + // read in expected results + std::ifstream ifs; + std::unordered_map<std::string, xt::xarray<double>> expected; + + for (const auto& metric : all_metrics_p) + { + ifs.open(EVALHYD_DATA_DIR "/expected/evalp/" + metric + ".csv"); + expected[metric] = xt::view( + xt::squeeze(xt::load_csv<double>(ifs)), + xt::newaxis(), xt::newaxis(), xt::newaxis(), + xt::newaxis(), xt::all() + ); + ifs.close(); + } + + return expected; +} + TEST(ProbabilistTests, TestBrier) { // read in data @@ -39,57 +81,72 @@ TEST(ProbabilistTests, TestBrier) xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_p(); + // read in expected results + auto expected = load_expected_p(); + // compute scores xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + std::vector<std::string> metrics = {"BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", "CRPS_FROM_BS"}; - std::vector<xt::xarray<double>> metrics = + std::vector<xt::xarray<double>> results = evalhyd::evalp( // shape: (sites [1], time [t]) - xt::view(observed, xt::newaxis(), xt::all()), + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), // shape: (sites [1], lead times [1], members [m], time [t]) - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), - {"BS", "BSS", "BS_CRD", "BS_LBD"}, - thresholds + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + metrics, + thresholds, + "high" ); // check results - // Brier scores - xt::xtensor<double, 5> bs = - {{{{{0.10615136, 0.07395622, 0.08669186, NAN}}}}}; - EXPECT_TRUE( - xt::sum(xt::isclose(metrics[0], bs, 1e-05, 1e-08, true)) - == xt::xscalar<double>(4) - ); + for (std::size_t m = 0; m < metrics.size(); m++) + { + if ( metrics[m] == "REL_DIAG" ) + { + // /!\ stacked-up thresholds in CSV file because 7D metric, + // so need to resize array + expected[metrics[m]].resize( + {std::size_t {1}, std::size_t {1}, std::size_t {1}, + std::size_t {1}, thresholds.shape(1), + predicted.shape(0) + 1, std::size_t {3}} + ); + } + EXPECT_TRUE(xt::all(xt::isclose( + results[m], expected[metrics[m]], 1e-05, 1e-08, true + ))) << "Failure for (" << metrics[m] << ")"; + } +} - // Brier skill scores - xt::xtensor<double, 5> bss = - {{{{{0.5705594, 0.6661165, 0.5635126, NAN}}}}}; - EXPECT_TRUE( - xt::sum(xt::isclose(metrics[1], bss, 1e-05, 1e-08, true)) - == xt::xscalar<double>(4) - ); +TEST(ProbabilistTests, TestCDF) +{ + // read in data + xt::xtensor<double, 1> observed; + xt::xtensor<double, 2> predicted; + std::tie(observed, predicted) = load_data_p(); - // Brier calibration-refinement decompositions - xt::xtensor<double, 6> bs_crd = - {{{{{{0.011411758, 0.1524456, 0.2471852}, - {0.005532413, 0.1530793, 0.2215031}, - {0.010139431, 0.1220601, 0.1986125}, - {NAN, NAN, NAN}}}}}}; - EXPECT_TRUE( - xt::sum(xt::isclose(metrics[2], bs_crd, 1e-05, 1e-08, true)) - == xt::xscalar<double>(12) - ); + // read in expected results + auto expected = load_expected_p(); - // Brier likelihood-base rate decompositions - xt::xtensor<double, 6> bs_lbd = - {{{{{{0.012159881, 0.1506234, 0.2446149}, - {0.008031746, 0.1473869, 0.2133114}, - {0.017191279, 0.1048221, 0.1743227}, - {NAN, NAN, NAN}}}}}}; - EXPECT_TRUE( - xt::sum(xt::isclose(metrics[3], bs_lbd, 1e-05, 1e-08, true)) - == xt::xscalar<double>(12) - ); + // compute scores + std::vector<std::string> metrics = {"CRPS_FROM_ECDF"}; + + std::vector<xt::xarray<double>> results = + evalhyd::evalp( + // shape: (sites [1], time [t]) + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + // shape: (sites [1], lead times [1], members [m], time [t]) + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + metrics + ); + + // check results + for (std::size_t m = 0; m < metrics.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results[m], expected[metrics[m]], 1e-05, 1e-08, true + ))) << "Failure for (" << metrics[m] << ")"; + } } TEST(ProbabilistTests, TestQuantiles) @@ -99,36 +156,172 @@ TEST(ProbabilistTests, TestQuantiles) xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_p(); + // read in expected results + auto expected = load_expected_p(); + // compute scores - std::vector<xt::xarray<double>> metrics = + std::vector<std::string> metrics = {"QS", "CRPS_FROM_QS"}; + + std::vector<xt::xarray<double>> results = + evalhyd::evalp( + // shape: (sites [1], time [t]) + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + // shape: (sites [1], lead times [1], members [m], time [t]) + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + metrics + ); + + // check results + for (std::size_t m = 0; m < metrics.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results[m], expected[metrics[m]], 1e-05, 1e-08, true + ))) << "Failure for (" << metrics[m] << ")"; + } +} + +TEST(ProbabilistTests, TestContingency) +{ + // read in data + xt::xtensor<double, 1> observed; + xt::xtensor<double, 2> predicted; + std::tie(observed, predicted) = load_data_p(); + + // read in expected results + auto expected = load_expected_p(); + + // compute scores + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + std::vector<std::string> metrics = {"POD", "POFD", "FAR", "CSI", "ROCSS"}; + + std::vector<xt::xarray<double>> results = + evalhyd::evalp( + // shape: (sites [1], time [t]) + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + // shape: (sites [1], lead times [1], members [m], time [t]) + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + metrics, + thresholds, + "low" + ); + + // check results + for (std::size_t m = 0; m < metrics.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results[m], expected[metrics[m]], 1e-05, 1e-08, true + ))) << "Failure for (" << metrics[m] << ")"; + } +} + +TEST(ProbabilistTests, TestRanks) +{ + // read in data + xt::xtensor<double, 1> observed; + xt::xtensor<double, 2> predicted; + std::tie(observed, predicted) = load_data_p(); + + // read in expected results + auto expected = load_expected_p(); + std::vector<std::string> metrics = {"RANK_HIST", "DS", "AS"}; + + // compute scores + std::vector<xt::xarray<double>> results = + evalhyd::evalp( + // shape: (sites [1], time [t]) + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + // shape: (sites [1], lead times [1], members [m], time [t]) + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + {"RANK_HIST", "DS", "AS"}, + xt::xtensor<double, 2>({}), + "high", // events + {}, // c_lvl + xt::xtensor<bool, 4>({}), // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + xtl::missing<const std::unordered_map<std::string, int>>(), // bootstrap + {}, // dts + 7 // seed + ); + + // check results + for (std::size_t m = 0; m < metrics.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results[m], expected[metrics[m]], 1e-05, 1e-08, true + ))) << "Failure for (" << metrics[m] << ")"; + } +} + +TEST(ProbabilistTests, TestIntervals) +{ + // read in data + xt::xtensor<double, 1> observed; + xt::xtensor<double, 2> predicted; + std::tie(observed, predicted) = load_data_p(); + + // read in expected results + auto expected = load_expected_p(); + + // compute scores + std::vector<std::string> metrics = {"CR", "AW", "AWN", "AWI", "WS", "WSS"}; + + std::vector<xt::xarray<double>> results = evalhyd::evalp( // shape: (sites [1], time [t]) - xt::view(observed, xt::newaxis(), xt::all()), + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), // shape: (sites [1], lead times [1], members [m], time [t]) - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), - {"QS", "CRPS"} + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + {"CR", "AW", "AWN", "AWI", "WS", "WSS"}, + xt::xtensor<double, 2>({}), + "", // events + {30., 80.} // c_lvl ); // check results - // Quantile scores - xt::xtensor<double, 5> qs = - {{{{{345.91578, 345.069256, 343.129359, 340.709869, 338.281598, - 335.973535, 333.555157, 330.332426, 327.333539, 324.325996, - 321.190082, 318.175117, 315.122186, 311.97205, 308.644942, - 305.612169, 302.169552, 298.445956, 294.974648, 291.273807, - 287.724586, 284.101905, 280.235592, 276.21865, 272.501484, - 268.652733, 264.740168, 260.8558, 256.90329, 252.926292, - 248.931239, 244.986396, 240.662998, 236.328964, 232.089785, - 227.387089, 222.976008, 218.699975, 214.099678, 209.67252, - 205.189587, 200.395746, 195.2372, 190.080139, 185.384244, - 180.617858, 174.58323, 169.154093, 163.110932, 156.274796, - 147.575315}}}}}; - EXPECT_TRUE(xt::allclose(metrics[0], qs)); - - // Continuous ranked probability scores - xt::xtensor<double, 4> crps = - {{{{252.956919}}}}; - EXPECT_TRUE(xt::allclose(metrics[1], crps)); + for (std::size_t m = 0; m < metrics.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results[m], expected[metrics[m]], 1e-05, 1e-08, true + ))) << "Failure for (" << metrics[m] << ")"; + } +} + +TEST(ProbabilistTests, TestMultiVariate) +{ + // read in data + xt::xtensor<double, 1> observed; + xt::xtensor<double, 2> predicted; + std::tie(observed, predicted) = load_data_p(); + + // read in expected results + auto expected = load_expected_p(); + + // compute scores + std::vector<std::string> metrics = {"ES"}; + + xt::xtensor<double, 2> obs = xt::repeat( + xt::view(observed, xt::newaxis(), xt::all()), 5, 0 + ); + xt::xtensor<double, 4> prd = xt::repeat( + xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), 5, 0 + ); + + std::vector<xt::xarray<double>> results = + evalhyd::evalp( + // shape: (sites [5], time [t]) + obs, + // shape: (sites [5], lead times [1], members [m], time [t]) + prd, + metrics + ); + + // check results + for (std::size_t m = 0; m < metrics.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results[m], expected[metrics[m]], 1e-05, 1e-08, true + ))) << "Failure for (" << metrics[m] << ")"; + } } TEST(ProbabilistTests, TestMasks) @@ -139,24 +332,25 @@ TEST(ProbabilistTests, TestMasks) std::tie(observed, predicted) = load_data_p(); // generate temporal subset by dropping 20 first time steps - xt::xtensor<double, 4> masks = + xt::xtensor<bool, 4> masks = xt::ones<bool>({std::size_t {1}, std::size_t {1}, std::size_t {1}, std::size_t {observed.size()}}); xt::view(masks, 0, xt::all(), 0, xt::range(0, 20)) = 0; // compute scores using masks to subset whole record xt::xtensor<double, 2> thresholds = {{690, 534, 445}}; - std::vector<std::string> metrics = - {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"}; + std::vector<double> confidence_levels = {30., 80.}; std::vector<xt::xarray<double>> metrics_masked = evalhyd::evalp( // shape: (sites [1], time [t]) - xt::view(observed, xt::newaxis(), xt::all()), + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), // shape: (sites [1], lead times [1], members [m], time [t]) - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), - metrics, + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, thresholds, + "high", + confidence_levels, // shape: (sites [1], lead times [1], subsets [1], time [t]) masks ); @@ -165,26 +359,41 @@ TEST(ProbabilistTests, TestMasks) std::vector<xt::xarray<double>> metrics_subset = evalhyd::evalp( // shape: (sites [1], time [t-20]) - xt::view(observed, xt::newaxis(), xt::range(20, _)), + xt::eval(xt::view(observed, xt::newaxis(), xt::range(20, _))), // shape: (sites [1], lead times [1], members [m], time [t-20]) - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::range(20, _)), - metrics, - thresholds + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::range(20, _))), + all_metrics_p, + thresholds, + "high", + confidence_levels ); // check results are identical - for (int m = 0; m < metrics.size(); m++) + for (std::size_t m = 0; m < all_metrics_p.size(); m++) { - EXPECT_TRUE(xt::allclose(metrics_masked[m], metrics_subset[m])) - << "Failure for (" << metrics[m] << ")"; + // --------------------------------------------------------------------- + // /!\ skip ranks-based metrics because it contains a random process + // for which setting the seed will not work because the time series + // lengths are different between "masked" and "subset", which + // results in different tensor shapes, and hence in different + // random ranks for ties + if ((all_metrics_p[m] == "RANK_HIST") + || (all_metrics_p[m] == "DS") + || (all_metrics_p[m] == "AS")) + { + continue; + } + // --------------------------------------------------------------------- + + EXPECT_TRUE(xt::all(xt::isclose(metrics_masked[m], metrics_subset[m], 1e-04, 1e-07, true))) + << "Failure for (" << all_metrics_p[m] << ")"; } } TEST(ProbabilistTests, TestMaskingConditions) { xt::xtensor<double, 2> thresholds = {{690, 534, 445}}; - std::vector<std::string> metrics = - {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"}; + std::vector<double> confidence_levels = {30., 80.}; // read in data xt::xtensor<double, 1> observed_; @@ -206,28 +415,49 @@ TEST(ProbabilistTests, TestMaskingConditions) std::vector<xt::xarray<double>> metrics_q_conditioned = evalhyd::evalp( - observed, - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), - metrics, thresholds, - masks, q_conditions + xt::eval(observed), + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, + thresholds, + "high", + confidence_levels, + masks, + q_conditions ); // compute scores using "NaN-ed" time indices where conditions on streamflow met std::vector<xt::xarray<double>> metrics_q_preconditioned = evalhyd::evalp( - xt::where((observed < 2000) | (observed > 3000), observed, NAN), - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), - metrics, thresholds + xt::eval(xt::where((observed < 2000) | (observed > 3000), observed, NAN)), + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, + thresholds, + "high", + confidence_levels ); // check results are identical - for (int m = 0; m < metrics.size(); m++) + for (std::size_t m = 0; m < all_metrics_p.size(); m++) { + // --------------------------------------------------------------------- + // /!\ skip ranks-based metrics because it contains a random process + // for which setting the seed will not work because the time series + // lengths are different between "conditioned" and "preconditioned", + // which results in different tensor shapes, and hence in different + // random ranks for ties + if ((all_metrics_p[m] == "RANK_HIST") + || (all_metrics_p[m] == "DS") + || (all_metrics_p[m] == "AS")) + { + continue; + } + // --------------------------------------------------------------------- + EXPECT_TRUE( - xt::allclose( - metrics_q_conditioned[m], metrics_q_preconditioned[m] - ) - ) << "Failure for (" << metrics[m] << ")"; + xt::all(xt::isclose(metrics_q_conditioned[m], + metrics_q_preconditioned[m], + 1e-05, 1e-08, true)) + ) << "Failure for (" << all_metrics_p[m] << ")"; } // conditions on streamflow statistics _____________________________________ @@ -242,28 +472,49 @@ TEST(ProbabilistTests, TestMaskingConditions) std::vector<xt::xarray<double>> metrics_q_conditioned_ = evalhyd::evalp( - observed, - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), - metrics, thresholds, - masks, q_conditions_ + xt::eval(observed), + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, + thresholds, + "high", + confidence_levels, + masks, + q_conditions_ ); // compute scores using "NaN-ed" time indices where conditions on streamflow met std::vector<xt::xarray<double>> metrics_q_preconditioned_ = evalhyd::evalp( - xt::where(q_prd_mean >= median, observed, NAN), - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), - metrics, thresholds + xt::eval(xt::where(q_prd_mean >= median, observed, NAN)), + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, + thresholds, + "high", + confidence_levels ); // check results are identical - for (int m = 0; m < metrics.size(); m++) + for (std::size_t m = 0; m < all_metrics_p.size(); m++) { + // --------------------------------------------------------------------- + // /!\ skip ranks-based metrics because it contains a random process + // for which setting the seed will not work because the time series + // lengths are different between "conditioned" and "preconditioned", + // which results in different tensor shapes, and hence in different + // random ranks for ties + if ((all_metrics_p[m] == "RANK_HIST") + || (all_metrics_p[m] == "DS") + || (all_metrics_p[m] == "AS")) + { + continue; + } + // --------------------------------------------------------------------- + EXPECT_TRUE( - xt::allclose( - metrics_q_conditioned_[m], metrics_q_preconditioned_[m] - ) - ) << "Failure for (" << metrics[m] << ")"; + xt::all(xt::isclose(metrics_q_conditioned_[m], + metrics_q_preconditioned_[m], + 1e-05, 1e-08, true)) + ) << "Failure for (" << all_metrics_p[m] << ")"; } // conditions on temporal indices __________________________________________ @@ -275,37 +526,56 @@ TEST(ProbabilistTests, TestMaskingConditions) std::vector<xt::xarray<double>> metrics_t_conditioned = evalhyd::evalp( - observed, - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), - metrics, thresholds, - masks, t_conditions + xt::eval(observed), + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, + thresholds, + "high", + confidence_levels, + masks, + t_conditions ); // compute scores on already subset time series std::vector<xt::xarray<double>> metrics_t_subset = evalhyd::evalp( - xt::view(observed, xt::all(), xt::range(0, 100)), - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::range(0, 100)), - metrics, thresholds + xt::eval(xt::view(observed_, xt::newaxis(), xt::range(0, 100))), + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::range(0, 100))), + all_metrics_p, + thresholds, + "high", + confidence_levels ); // check results are identical - for (int m = 0; m < metrics.size(); m++) + for (std::size_t m = 0; m < all_metrics_p.size(); m++) { + // --------------------------------------------------------------------- + // /!\ skip ranks-based metrics because it contains a random process + // for which setting the seed will not work because the time series + // lengths are different between "conditioned" and "subset", + // which results in different tensor shapes, and hence in different + // random ranks for ties + if ((all_metrics_p[m] == "RANK_HIST") + || (all_metrics_p[m] == "DS") + || (all_metrics_p[m] == "AS")) + { + continue; + } + // --------------------------------------------------------------------- + EXPECT_TRUE( - xt::allclose( - metrics_t_conditioned[m], metrics_t_subset[m] - ) - ) << "Failure for (" << metrics[m] << ")"; + xt::all(xt::isclose(metrics_t_conditioned[m], + metrics_t_subset[m], + 1e-05, 1e-08, true)) + ) << "Failure for (" << all_metrics_p[m] << ")"; } } TEST(ProbabilistTests, TestMissingData) { - xt::xtensor<double, 2> thresholds - {{ 4., 5. }}; - std::vector<std::string> metrics = - {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"}; + xt::xtensor<double, 2> thresholds = {{ 4., 5. }}; + std::vector<double> confidence_levels = {30., 80.}; // compute metrics on series with NaN xt::xtensor<double, 4> forecast_nan {{ @@ -326,8 +596,10 @@ TEST(ProbabilistTests, TestMissingData) evalhyd::evalp( observed_nan, forecast_nan, - metrics, - thresholds + all_metrics_p, + thresholds, + "high", + confidence_levels ); // compute metrics on manually subset series (one leadtime at a time) @@ -345,8 +617,10 @@ TEST(ProbabilistTests, TestMissingData) evalhyd::evalp( observed_pp1, forecast_pp1, - metrics, - thresholds + all_metrics_p, + thresholds, + "high", + confidence_levels ); xt::xtensor<double, 4> forecast_pp2 {{ @@ -363,50 +637,49 @@ TEST(ProbabilistTests, TestMissingData) evalhyd::evalp( observed_pp2, forecast_pp2, - metrics, - thresholds + all_metrics_p, + thresholds, + "high", + confidence_levels ); // check that numerical results are identical - for (int m = 0; m < metrics.size(); m++) { + for (std::size_t m = 0; m < all_metrics_p.size(); m++) + { // for leadtime 1 EXPECT_TRUE( - xt::allclose( - xt::view(metrics_nan[m], xt::all(), 0), - xt::view(metrics_pp1[m], xt::all(), 0) - ) - ) << "Failure for (" << metrics[m] << ", " << "leadtime 1)"; + xt::all(xt::isclose(xt::view(metrics_nan[m], xt::all(), 0), + xt::view(metrics_pp1[m], xt::all(), 0), + 1e-05, 1e-08, true)) + ) << "Failure for (" << all_metrics_p[m] << ", " << "leadtime 1)"; // for leadtime 2 EXPECT_TRUE( - xt::allclose( - xt::view(metrics_nan[m], xt::all(), 1), - xt::view(metrics_pp2[m], xt::all(), 0) - ) - ) << "Failure for (" << metrics[m] << ", " << "leadtime 2)"; + xt::all(xt::isclose(xt::view(metrics_nan[m], xt::all(), 1), + xt::view(metrics_pp2[m], xt::all(), 0), + 1e-05, 1e-08, true)) + ) << "Failure for (" << all_metrics_p[m] << ", " << "leadtime 2)"; } } TEST(ProbabilistTests, TestBootstrap) { - xt::xtensor<double, 2> thresholds - {{ 33.87, 55.67 }}; - std::vector<std::string> metrics = - {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"}; + xt::xtensor<double, 2> thresholds = {{ 33.87, 55.67 }}; + std::vector<double> confidence_levels = {30., 80.}; // read in data std::ifstream ifs; - ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv"); xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1)); ifs.close(); std::vector<std::string> datetimes (x_dts.begin(), x_dts.end()); - ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv"); xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1)); ifs.close(); - ifs.open(EVALHYD_DATA_DIR "/q_prd_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/data/q_prd_1yr.csv"); xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1); ifs.close(); @@ -416,12 +689,14 @@ TEST(ProbabilistTests, TestBootstrap) std::vector<xt::xarray<double>> metrics_bts = evalhyd::evalp( - xt::view(observed, xt::newaxis(), xt::all()), - xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), - metrics, + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, thresholds, - {}, // t_msk - {}, // m_cdt + "high", // events + confidence_levels, + xt::xtensor<bool, 4>({}), // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt bootstrap, datetimes ); @@ -439,19 +714,237 @@ TEST(ProbabilistTests, TestBootstrap) std::vector<xt::xarray<double>> metrics_rep = evalhyd::evalp( - xt::view(observed_x3, xt::newaxis(), xt::all()), - xt::view(predicted_x3, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), - metrics, - thresholds + xt::eval(xt::view(observed_x3, xt::newaxis(), xt::all())), + xt::eval(xt::view(predicted_x3, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, + thresholds, + "high", + confidence_levels ); // check results are identical - for (int m = 0; m < metrics.size(); m++) + for (std::size_t m = 0; m < all_metrics_p.size(); m++) { + // --------------------------------------------------------------------- + // /!\ skip ranks-based metrics because it contains a random process + // for which setting the seed will not work because the time series + // lengths are different between "bts" and "rep", which + // results in different tensor shapes, and hence in different + // random ranks for ties + if ((all_metrics_p[m] == "RANK_HIST") + || (all_metrics_p[m] == "DS") + || (all_metrics_p[m] == "AS")) + { + continue; + } + // --------------------------------------------------------------------- + EXPECT_TRUE( - xt::allclose( + xt::all(xt::isclose( metrics_bts[m], metrics_rep[m] - ) - ) << "Failure for (" << metrics[m] << ")"; + )) + ) << "Failure for (" << all_metrics_p[m] << ")"; } -} \ No newline at end of file +} + +TEST(ProbabilistTests, TestBootstrapSummary) +{ + xt::xtensor<double, 2> thresholds = {{ 33.87, 55.67 }}; + std::vector<double> confidence_levels = {30., 80.}; + + // read in data + std::ifstream ifs; + + ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv"); + xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1)); + ifs.close(); + std::vector<std::string> datetimes (x_dts.begin(), x_dts.end()); + + ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv"); + xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1)); + ifs.close(); + + ifs.open(EVALHYD_DATA_DIR "/data/q_prd_1yr.csv"); + xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1); + ifs.close(); + + // compute metrics via bootstrap + std::unordered_map<std::string, int> bootstrap_0 = + {{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}}; + + std::vector<xt::xarray<double>> metrics_raw = + evalhyd::evalp( + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, + thresholds, + "high", // events + confidence_levels, + xt::xtensor<bool, 4>({}), // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + bootstrap_0, + datetimes + ); + + // compute metrics via bootstrap with mean and standard deviation summary + std::unordered_map<std::string, int> bootstrap_1 = + {{"n_samples", 10}, {"len_sample", 3}, {"summary", 1}}; + + std::vector<xt::xarray<double>> metrics_mas = + evalhyd::evalp( + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, + thresholds, + "high", // events + confidence_levels, + xt::xtensor<bool, 4>({}), // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + bootstrap_1, + datetimes + ); + + // check results are identical + for (std::size_t m = 0; m < all_metrics_p.size(); m++) + { + // --------------------------------------------------------------------- + // /!\ skip ranks-based metrics because it contains a random process + // for which setting the seed will not work because the time series + // lengths are different between "bts" and "rep", which + // results in different tensor shapes, and hence in different + // random ranks for ties + if ((all_metrics_p[m] == "RANK_HIST") + || (all_metrics_p[m] == "DS") + || (all_metrics_p[m] == "AS")) + { + continue; + } + // --------------------------------------------------------------------- + + // mean + EXPECT_TRUE( + xt::all(xt::isclose( + xt::mean(metrics_raw[m], {3}), + xt::view(metrics_mas[m], xt::all(), xt::all(), xt::all(), 0) + )) + ) << "Failure for (" << all_metrics_p[m] << ") on mean"; + // standard deviation + EXPECT_TRUE( + xt::all(xt::isclose( + xt::stddev(metrics_raw[m], {3}), + xt::view(metrics_mas[m], xt::all(), xt::all(), xt::all(), 1) + )) + ) << "Failure for (" << all_metrics_p[m] << ") on standard deviation"; + } + + // compute metrics via bootstrap with quantiles summary + std::unordered_map<std::string, int> bootstrap_2 = + {{"n_samples", 10}, {"len_sample", 3}, {"summary", 2}}; + + std::vector<xt::xarray<double>> metrics_qtl = + evalhyd::evalp( + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + all_metrics_p, + thresholds, + "high", // events + confidence_levels, + xt::xtensor<bool, 4>({}), // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + bootstrap_2, + datetimes + ); + + // check results are identical + for (std::size_t m = 0; m < all_metrics_p.size(); m++) + { + // --------------------------------------------------------------------- + // /!\ skip ranks-based metrics because it contains a random process + // for which setting the seed will not work because the time series + // lengths are different between "bts" and "rep", which + // results in different tensor shapes, and hence in different + // random ranks for ties + if ((all_metrics_p[m] == "RANK_HIST") + || (all_metrics_p[m] == "DS") + || (all_metrics_p[m] == "AS")) + { + continue; + } + // --------------------------------------------------------------------- + + // quantiles + std::vector<double> quantiles = {0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95}; + std::size_t i = 0; + + for (auto q : quantiles) + { + EXPECT_TRUE( + xt::all(xt::isclose( + xt::quantile(metrics_raw[m], {q}, 3), + xt::view(metrics_qtl[m], xt::all(), xt::all(), xt::all(), i) + )) + ) << "Failure for (" << all_metrics_p[m] << ") on quantile " << q; + i++; + } + } +} + +TEST(ProbabilistTests, TestCompleteness) +{ + std::vector<std::string> diags = {"completeness"}; + + // compute metrics on series with NaN + xt::xtensor<double, 4> prd = {{ + // leadtime 1 + {{ 5.3, NAN, 5.7, 2.3, 3.3, NAN }, + { 4.3, NAN, 4.7, 4.3, 3.4, NAN }, + { 5.3, NAN, 5.7, 2.3, 3.8, NAN }}, + // leadtime 2 + {{ NAN, 4.2, 5.7, 2.3, 3.1, 4.1 }, + { NAN, 4.2, 4.7, 4.3, 3.3, 2.8 }, + { NAN, 5.2, 5.7, 2.3, 3.9, 3.5 }} + }}; + + xt::xtensor<double, 2> obs = + {{ 4.7, 4.3, NAN, 2.7, 4.1, 5.0 }}; + + xt::xtensor<bool, 4> msk = {{ + // leadtime 1 + {{ true, true, true, false, true, true }, + { true, true, true, true, true, true }}, + // leadtime 2 + {{ true, true, true, true, true, false }, + { true, true, true, true, true, true }}, + }}; + + std::vector<xt::xarray<double>> results = + evalhyd::evalp( + obs, + prd, + std::vector<std::string> {}, // metrics + xt::xtensor<double, 2>({}), // thresholds + xtl::missing<const std::string>(), // events + {}, + msk, // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + xtl::missing<const std::unordered_map<std::string, int>>(), // bootstrap + {}, // dts + xtl::missing<const int>(), // seed + diags + ); + + // check that numerical results are identical + xt::xtensor<double, 4> expected = {{ + // leadtime 1 + {{ 2. }, + { 3. }}, + // leadtime 2 + {{ 3. }, + { 4. }}, + }}; + + EXPECT_TRUE( + xt::all(xt::isclose(results[0], expected, 1e-05, 1e-08, true)) + ); +} + diff --git a/tests/test_uncertainty.cpp b/tests/test_uncertainty.cpp index 1bfb312240310b6ec5fe3849b65127644394c9e6..5889ad36f912baba0cfb409d09a54a25d6dcd7e7 100644 --- a/tests/test_uncertainty.cpp +++ b/tests/test_uncertainty.cpp @@ -1,11 +1,14 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + #include <unordered_map> #include <gtest/gtest.h> #include <xtensor/xtensor.hpp> #include <xtensor/xrandom.hpp> -#include <xtensor/xio.hpp> -#include "uncertainty.hpp" +#include "detail/uncertainty.hpp" TEST(UncertaintyTests, TestBootstrapGenerator) { @@ -16,45 +19,55 @@ TEST(UncertaintyTests, TestBootstrapGenerator) "2013-10-01 00:00:00", "2013-10-02 00:00:00", "2013-10-03 00:00:00", "2013-10-04 00:00:00", "2013-10-05 00:00:00", "2013-10-06 00:00:00", "2013-10-07 00:00:00", "2013-10-08 00:00:00", "2013-10-09 00:00:00", "2013-10-10 00:00:00", "2013-10-11 00:00:00", "2013-10-12 00:00:00", "2013-10-13 00:00:00", "2013-10-14 00:00:00", "2013-10-15 00:00:00", "2013-10-16 00:00:00", "2013-10-17 00:00:00", "2013-10-18 00:00:00", "2013-10-19 00:00:00", "2013-10-20 00:00:00", "2013-10-21 00:00:00", "2013-10-22 00:00:00", "2013-10-23 00:00:00", "2013-10-24 00:00:00", "2013-10-25 00:00:00", "2013-10-26 00:00:00", "2013-10-27 00:00:00", "2013-10-28 00:00:00", "2013-10-29 00:00:00", "2013-10-30 00:00:00", "2013-10-31 00:00:00", "2013-11-01 00:00:00", "2013-11-02 00:00:00", "2013-11-03 00:00:00", "2013-11-04 00:00:00", "2013-11-05 00:00:00", "2013-11-06 00:00:00", "2013-11-07 00:00:00", "2013-11-08 00:00:00", "2013-11-09 00:00:00", "2013-11-10 00:00:00", "2013-11-11 00:00:00", "2013-11-12 00:00:00", "2013-11-13 00:00:00", "2013-11-14 00:00:00", "2013-11-15 00:00:00", "2013-11-16 00:00:00", "2013-11-17 00:00:00", "2013-11-18 00:00:00", "2013-11-19 00:00:00", "2013-11-20 00:00:00", "2013-11-21 00:00:00", "2013-11-22 00:00:00", "2013-11-23 00:00:00", "2013-11-24 00:00:00", "2013-11-25 00:00:00", "2013-11-26 00:00:00", "2013-11-27 00:00:00", "2013-11-28 00:00:00", "2013-11-29 00:00:00", "2013-11-30 00:00:00", "2013-12-01 00:00:00", "2013-12-02 00:00:00", "2013-12-03 00:00:00", "2013-12-04 00:00:00", "2013-12-05 00:00:00", "2013-12-06 00:00:00", "2013-12-07 00:00:00", "2013-12-08 00:00:00", "2013-12-09 00:00:00", "2013-12-10 00:00:00", "2013-12-11 00:00:00", "2013-12-12 00:00:00", "2013-12-13 00:00:00", "2013-12-14 00:00:00", "2013-12-15 00:00:00", "2013-12-16 00:00:00", "2013-12-17 00:00:00", "2013-12-18 00:00:00", "2013-12-19 00:00:00", "2013-12-20 00:00:00", "2013-12-21 00:00:00", "2013-12-22 00:00:00", "2013-12-23 00:00:00", "2013-12-24 00:00:00", "2013-12-25 00:00:00", "2013-12-26 00:00:00", "2013-12-27 00:00:00", "2013-12-28 00:00:00", "2013-12-29 00:00:00", "2013-12-30 00:00:00", "2013-12-31 00:00:00", "2014-01-01 00:00:00", "2014-01-02 00:00:00", "2014-01-03 00:00:00", "2014-01-04 00:00:00", "2014-01-05 00:00:00", "2014-01-06 00:00:00", "2014-01-07 00:00:00", "2014-01-08 00:00:00", "2014-01-09 00:00:00", "2014-01-10 00:00:00", "2014-01-11 00:00:00", "2014-01-12 00:00:00", "2014-01-13 00:00:00", "2014-01-14 00:00:00", "2014-01-15 00:00:00", "2014-01-16 00:00:00", "2014-01-17 00:00:00", "2014-01-18 00:00:00", "2014-01-19 00:00:00", "2014-01-20 00:00:00", "2014-01-21 00:00:00", "2014-01-22 00:00:00", "2014-01-23 00:00:00", "2014-01-24 00:00:00", "2014-01-25 00:00:00", "2014-01-26 00:00:00", "2014-01-27 00:00:00", "2014-01-28 00:00:00", "2014-01-29 00:00:00", "2014-01-30 00:00:00", "2014-01-31 00:00:00", "2014-02-01 00:00:00", "2014-02-02 00:00:00", "2014-02-03 00:00:00", "2014-02-04 00:00:00", "2014-02-05 00:00:00", "2014-02-06 00:00:00", "2014-02-07 00:00:00", "2014-02-08 00:00:00", "2014-02-09 00:00:00", "2014-02-10 00:00:00", "2014-02-11 00:00:00", "2014-02-12 00:00:00", "2014-02-13 00:00:00", "2014-02-14 00:00:00", "2014-02-15 00:00:00", "2014-02-16 00:00:00", "2014-02-17 00:00:00", "2014-02-18 00:00:00", "2014-02-19 00:00:00", "2014-02-20 00:00:00", "2014-02-21 00:00:00", "2014-02-22 00:00:00", "2014-02-23 00:00:00", "2014-02-24 00:00:00", "2014-02-25 00:00:00", "2014-02-26 00:00:00", "2014-02-27 00:00:00", "2014-02-28 00:00:00", "2014-03-01 00:00:00", "2014-03-02 00:00:00", "2014-03-03 00:00:00", "2014-03-04 00:00:00", "2014-03-05 00:00:00", "2014-03-06 00:00:00", "2014-03-07 00:00:00", "2014-03-08 00:00:00", "2014-03-09 00:00:00", "2014-03-10 00:00:00", "2014-03-11 00:00:00", "2014-03-12 00:00:00", "2014-03-13 00:00:00", "2014-03-14 00:00:00", "2014-03-15 00:00:00", "2014-03-16 00:00:00", "2014-03-17 00:00:00", "2014-03-18 00:00:00", "2014-03-19 00:00:00", "2014-03-20 00:00:00", "2014-03-21 00:00:00", "2014-03-22 00:00:00", "2014-03-23 00:00:00", "2014-03-24 00:00:00", "2014-03-25 00:00:00", "2014-03-26 00:00:00", "2014-03-27 00:00:00", "2014-03-28 00:00:00", "2014-03-29 00:00:00", "2014-03-30 00:00:00", "2014-03-31 00:00:00", "2014-04-01 00:00:00", "2014-04-02 00:00:00", "2014-04-03 00:00:00", "2014-04-04 00:00:00", "2014-04-05 00:00:00", "2014-04-06 00:00:00", "2014-04-07 00:00:00", "2014-04-08 00:00:00", "2014-04-09 00:00:00", "2014-04-10 00:00:00", "2014-04-11 00:00:00", "2014-04-12 00:00:00", "2014-04-13 00:00:00", "2014-04-14 00:00:00", "2014-04-15 00:00:00", "2014-04-16 00:00:00", "2014-04-17 00:00:00", "2014-04-18 00:00:00", "2014-04-19 00:00:00", "2014-04-20 00:00:00", "2014-04-21 00:00:00", "2014-04-22 00:00:00", "2014-04-23 00:00:00", "2014-04-24 00:00:00", "2014-04-25 00:00:00", "2014-04-26 00:00:00", "2014-04-27 00:00:00", "2014-04-28 00:00:00", "2014-04-29 00:00:00", "2014-04-30 00:00:00", "2014-05-01 00:00:00", "2014-05-02 00:00:00", "2014-05-03 00:00:00", "2014-05-04 00:00:00", "2014-05-05 00:00:00", "2014-05-06 00:00:00", "2014-05-07 00:00:00", "2014-05-08 00:00:00", "2014-05-09 00:00:00", "2014-05-10 00:00:00", "2014-05-11 00:00:00", "2014-05-12 00:00:00", "2014-05-13 00:00:00", "2014-05-14 00:00:00", "2014-05-15 00:00:00", "2014-05-16 00:00:00", "2014-05-17 00:00:00", "2014-05-18 00:00:00", "2014-05-19 00:00:00", "2014-05-20 00:00:00", "2014-05-21 00:00:00", "2014-05-22 00:00:00", "2014-05-23 00:00:00", "2014-05-24 00:00:00", "2014-05-25 00:00:00", "2014-05-26 00:00:00", "2014-05-27 00:00:00", "2014-05-28 00:00:00", "2014-05-29 00:00:00", "2014-05-30 00:00:00", "2014-05-31 00:00:00", "2014-06-01 00:00:00", "2014-06-02 00:00:00", "2014-06-03 00:00:00", "2014-06-04 00:00:00", "2014-06-05 00:00:00", "2014-06-06 00:00:00", "2014-06-07 00:00:00", "2014-06-08 00:00:00", "2014-06-09 00:00:00", "2014-06-10 00:00:00", "2014-06-11 00:00:00", "2014-06-12 00:00:00", "2014-06-13 00:00:00", "2014-06-14 00:00:00", "2014-06-15 00:00:00", "2014-06-16 00:00:00", "2014-06-17 00:00:00", "2014-06-18 00:00:00", "2014-06-19 00:00:00", "2014-06-20 00:00:00", "2014-06-21 00:00:00", "2014-06-22 00:00:00", "2014-06-23 00:00:00", "2014-06-24 00:00:00", "2014-06-25 00:00:00", "2014-06-26 00:00:00", "2014-06-27 00:00:00", "2014-06-28 00:00:00", "2014-06-29 00:00:00", "2014-06-30 00:00:00", "2014-07-01 00:00:00", "2014-07-02 00:00:00", "2014-07-03 00:00:00", "2014-07-04 00:00:00", "2014-07-05 00:00:00", "2014-07-06 00:00:00", "2014-07-07 00:00:00", "2014-07-08 00:00:00", "2014-07-09 00:00:00", "2014-07-10 00:00:00", "2014-07-11 00:00:00", "2014-07-12 00:00:00", "2014-07-13 00:00:00", "2014-07-14 00:00:00", "2014-07-15 00:00:00", "2014-07-16 00:00:00", "2014-07-17 00:00:00", "2014-07-18 00:00:00", "2014-07-19 00:00:00", "2014-07-20 00:00:00", "2014-07-21 00:00:00", "2014-07-22 00:00:00", "2014-07-23 00:00:00", "2014-07-24 00:00:00", "2014-07-25 00:00:00", "2014-07-26 00:00:00", "2014-07-27 00:00:00", "2014-07-28 00:00:00", "2014-07-29 00:00:00", "2014-07-30 00:00:00", "2014-07-31 00:00:00", "2014-08-01 00:00:00", "2014-08-02 00:00:00", "2014-08-03 00:00:00", "2014-08-04 00:00:00", "2014-08-05 00:00:00", "2014-08-06 00:00:00", "2014-08-07 00:00:00", "2014-08-08 00:00:00", "2014-08-09 00:00:00", "2014-08-10 00:00:00", "2014-08-11 00:00:00", "2014-08-12 00:00:00", "2014-08-13 00:00:00", "2014-08-14 00:00:00", "2014-08-15 00:00:00", "2014-08-16 00:00:00", "2014-08-17 00:00:00", "2014-08-18 00:00:00", "2014-08-19 00:00:00", "2014-08-20 00:00:00", "2014-08-21 00:00:00", "2014-08-22 00:00:00", "2014-08-23 00:00:00", "2014-08-24 00:00:00", "2014-08-25 00:00:00", "2014-08-26 00:00:00", "2014-08-27 00:00:00", "2014-08-28 00:00:00", "2014-08-29 00:00:00", "2014-08-30 00:00:00", "2014-08-31 00:00:00", "2014-09-01 00:00:00", "2014-09-02 00:00:00", "2014-09-03 00:00:00", "2014-09-04 00:00:00", "2014-09-05 00:00:00", "2014-09-06 00:00:00", "2014-09-07 00:00:00", "2014-09-08 00:00:00", "2014-09-09 00:00:00", "2014-09-10 00:00:00", "2014-09-11 00:00:00", "2014-09-12 00:00:00", "2014-09-13 00:00:00", "2014-09-14 00:00:00", "2014-09-15 00:00:00", "2014-09-16 00:00:00", "2014-09-17 00:00:00", "2014-09-18 00:00:00", "2014-09-19 00:00:00", "2014-09-20 00:00:00", "2014-09-21 00:00:00", "2014-09-22 00:00:00", "2014-09-23 00:00:00", "2014-09-24 00:00:00", "2014-09-25 00:00:00", "2014-09-26 00:00:00", "2014-09-27 00:00:00", "2014-09-28 00:00:00", "2014-09-29 00:00:00", "2014-09-30 00:00:00" }; - // fix seed to know which experiment is going to be generated - xt::random::seed(7); - // use bootstrap generator to get samples - auto samples = evalhyd::uncertainty::bootstrap(datetimes, 5, 4); + // experiment (5 samples of 4 year length, set seed to fixed value 7): + auto samples = evalhyd::uncertainty::bootstrap(datetimes, 5, 4, 7); - // result expected from generator + // manually generate results expected from random generator + std::vector<std::vector<int>> results; - // experiment (5 samples of 4 year length): - std::vector<std::vector<int>> results = { - // (2012, 2012, 2014, 2012) - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365}, - // (2013, 2014, 2014, 2013) - {366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, - 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, - 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730}, - // (2014, 2012, 2013, 2012) - {731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365}, - // (2013, 2012, 2012, 2013) - {366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730}, - // (2012, 2012, 2013, 2013) - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730} - }; + std::unordered_map<int, std::vector<int>> year_to_indices; + xt::xtensor<int, 1> hy_2012 = xt::arange(0, 366); // 0-365 + year_to_indices[2011] = std::vector<int>(hy_2012.begin(), hy_2012.end()); + xt::xtensor<int, 1> hy_2013 = xt::arange(366, 731); // 366-730 + year_to_indices[2012] = std::vector<int>(hy_2013.begin(), hy_2013.end()); + xt::xtensor<int, 1> hy_2014 = xt::arange(731, 1096); // 731-1095 + year_to_indices[2013] = std::vector<int>(hy_2014.begin(), hy_2014.end()); + + std::vector<std::vector<int>> sampled_years; +#if EVALHYD_TESTING_OS == WINDOWS + sampled_years = {{2011, 2012, 2012, 2013}, + {2012, 2011, 2013, 2013}, + {2012, 2013, 2012, 2012}, + {2013, 2011, 2013, 2012}, + {2011, 2012, 2011, 2011}}; +#elif EVALHYD_TESTING_OS == MACOS + sampled_years = {{2011, 2012, 2013, 2011}, + {2012, 2013, 2011, 2012}, + {2013, 2013, 2013, 2011}, + {2013, 2011, 2011, 2011}, + {2011, 2013, 2013, 2011}}; +#elif EVALHYD_TESTING_OS == LINUX + sampled_years = {{2011, 2011, 2013, 2011}, + {2012, 2013, 2013, 2012}, + {2013, 2011, 2012, 2011}, + {2012, 2011, 2011, 2012}, + {2011, 2011, 2012, 2012}}; +#endif + + for (std::size_t s = 0; s < samples.size(); s++) + { + std::vector<int> indices; + for (std::size_t y = 0; y < 4; y++) + { + auto i = year_to_indices[sampled_years[s][y]]; + indices.insert(indices.end(), i.begin(), i.end()); + } + results.push_back(indices); + } // check that sampled indices are as expected by applying them on some data - for (int s = 0; s < samples.size(); s++) + for (std::size_t s = 0; s < samples.size(); s++) { auto data = xt::arange(datetimes.size()); EXPECT_TRUE( @@ -64,4 +77,4 @@ TEST(UncertaintyTests, TestBootstrapGenerator) ) ) << "Failure for ( sample " << s << ")"; } -} \ No newline at end of file +}