/*
    libmaus2
    Copyright (C) 2009-2014 German Tischler
    Copyright (C) 2011-2014 Genome Research Limited

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
#if ! defined(LIBMAUS2_FASTX_FASTASTREAMSET_HPP)
#define LIBMAUS2_FASTX_FASTASTREAMSET_HPP

#include <libmaus2/fastx/FastAStream.hpp>
#include <libmaus2/fastx/SpaceTable.hpp>
#include <libmaus2/util/ToUpperTable.hpp>
#include <libmaus2/digest/md5.hpp>

#include <libmaus2/fastx/RefPathTokenVectorSequence.hpp>
#include <libmaus2/aio/PosixFdOutputStream.hpp>
#include <libmaus2/util/GetFileSize.hpp>
#include <libmaus2/aio/InputStreamFactoryContainer.hpp>

namespace libmaus2
{
	namespace fastx
	{
		struct FastAStreamSet
		{
			::libmaus2::fastx::FastALineParser parser;

			FastAStreamSet(std::istream & in) : parser(in) {}

			bool getNextStream(std::pair<std::string,FastAStream::shared_ptr_type> & P)
			{
				::libmaus2::fastx::FastALineParserLineInfo line;

				if ( ! parser.getNextLine(line) )
					return false;

				if ( line.linetype != ::libmaus2::fastx::FastALineParserLineInfo::libmaus2_fastx_fasta_id_line )
				{
					libmaus2::exception::LibMausException se;
					se.getStream() << "FastAStreamSet::getNextStream(): unexpected line type" << std::endl;
					se.finish();
					throw se;
				}

				std::string const id(line.line,line.line+line.linelen);

				libmaus2::fastx::FastAStream::shared_ptr_type ptr(
					new libmaus2::fastx::FastAStream(parser,64*1024,0));

				P.first = id;
				P.second = ptr;

				return true;
			}

			bool getStream(std::string const & name, FastAStream::shared_ptr_type & S)
			{
				std::pair<std::string,FastAStream::shared_ptr_type> P;

				while ( getNextStream(P) )
				{
					if ( P.first == name )
					{
						S = P.second;
						return true;
					}
					else
					{
						std::istream & in = *(P.second);
						while ( in )
							in.ignore(64*1024);
					}
				}
				return false;
			}

			static std::string getStreamAsString(std::string const & fn, std::string const & name)
			{
				libmaus2::aio::InputStreamInstance ISI(fn);
				FastAStreamSet FASS(ISI);
				return FASS.getStreamAsString(name);
			}

			std::string getStreamAsString(std::string const & name)
			{
				FastAStream::shared_ptr_type S;
				bool const ok = getStream(name,S);

				if ( ok )
				{
					typedef libmaus2::autoarray::AutoArray<char>::shared_ptr_type as;
					std::vector<as> ASV;
					std::vector<uint64_t> ASN;
					uint64_t na = 0;
					std::istream & in = *S;

					while ( in )
					{
						as A(new libmaus2::autoarray::AutoArray<char>(64*1024,false));
						in.read(A->begin(),A->size());
						uint64_t const n = in.gcount();
						ASV.push_back(A);
						ASN.push_back(n);
						na += n;
					}

					std::string s(na,' ');

					uint64_t c = 0;
					for ( uint64_t i = 0; i < ASV.size(); ++i )
					{
						std::copy(
							ASV[i]->begin(),
							ASV[i]->begin()+ASN[i],
							s.begin() + c
						);
						c += ASN[i];
					}

					return s;
				}
				else
				{
					libmaus2::exception::LibMausException lme;
					lme.getStream() << "FastAStreamSet::getStreamAsString: requested sequence " << name << " is not contained in file" << std::endl;
					lme.finish();
					throw lme;
				}
			}

			std::map<std::string,std::string> computeMD5(bool writedata = true, bool verify = true)
			{
				std::pair<std::string,FastAStream::shared_ptr_type> P;
				std::map<std::string,std::string> M;
				libmaus2::autoarray::AutoArray<char> B(64*1024,false);
				unsigned char * u = reinterpret_cast<unsigned char *>(B.begin());
				libmaus2::fastx::SpaceTable const S;
				libmaus2::util::ToUpperTable const T;
				uint8_t digest[libmaus2::util::MD5::digestlength];

				char const * datadir = writedata ? getenv("REF_CACHE") : NULL;
				if ( (!datadir) || (!*datadir) )
					datadir = NULL;
				// do not write data if no location is given
				if ( (!datadir) )
					writedata = false;

				char const * refpath = getenv("REF_PATH");
				if ( (!refpath) || (!*refpath) )
					refpath = NULL;

				RefPathTokenVector refcacheexp(writedata ? std::string(datadir) : std::string());
				RefPathTokenVectorSequence refpathexp(refpath ? std::string(refpath) : std::string());

				while ( getNextStream(P) )
				{
					std::string id = P.first;
					std::istream & str = *(P.second);
					libmaus2::util::MD5 md5;
					md5.init();

					std::ostringstream data;

					// shorten id by cutting off everystring from first white space
					uint64_t z = 0;
					while ( z < id.size() && S.nospacetable[static_cast<unsigned char>(id[z])] )
						++z;
					id = id.substr(0,z);

					while ( str )
					{
						str.read(B.begin(),B.size());
						size_t const n = str.gcount();

						size_t o = 0;
						for ( size_t i = 0; i < n; ++i )
							if ( S.nospacetable[ u[i] ] )
								u[o++] = T.touppertable[u[i]];

						md5.update(reinterpret_cast<uint8_t const *>(u),o);
						if ( writedata )
							data.write(B.begin(),o);
					}

					md5.digest(&digest[0]);
					std::string const sdigest = md5.digestToString(&digest[0]);

					#if 0
					std::cerr << id << "\t" << sdigest << "\t" << refcacheexp.expand(sdigest);
					std::vector<std::string> E = refpathexp.expand(sdigest);
					for ( uint64_t z = 0; z < E.size(); ++z )
						std::cerr << "\t" << E[z];
					std::cerr << std::endl;
					#endif

					M[id] = sdigest;

					if ( writedata )
					{
						std::vector<std::string> E = refpathexp.expand(sdigest);
						E.push_back(refcacheexp.expand(sdigest));

						bool found = false;
						std::string foundfn;

						// check if the data is in the cache
						for ( size_t z = 0; (!found) && z < E.size(); ++z )
						{
							std::string e = E[z];

							if ( e.find("URL=") != std::string::npos && e.find("URL=") == 0 )
								e = e.substr(strlen("URL="));

							if ( libmaus2::aio::InputStreamFactoryContainer::tryOpen(e) )
							{
								found = true;
								foundfn = e;
							}
						}

						// data not found in cache
						if ( !found )
						{
							// fixme: USE LOCKING
							libmaus2::aio::PosixFdOutputStream PFOS(E.back());
							std::string const sdata = data.str();
							PFOS.write(sdata.c_str(),sdata.size());
							PFOS.flush();
							if ( ! PFOS )
							{
								libmaus2::exception::LibMausException lme;
								lme.getStream() << "libmaus2::fastx::FastAStreamSet: computeMD5 failed to write sequence data to file" << E.back() << std::endl;
								lme.finish();
								throw lme;
							}
						}
						else if ( verify )
						{
							libmaus2::aio::InputStream::unique_ptr_type Pin(libmaus2::aio::InputStreamFactoryContainer::constructUnique(foundfn));
							std::istream & PFIS = *Pin;

							libmaus2::util::MD5 checkmd5;
							checkmd5.init();

							while ( PFIS )
							{
								PFIS.read(B.begin(),B.size());
								checkmd5.update(reinterpret_cast<uint8_t const *>(B.begin()),PFIS.gcount());
							}

							checkmd5.digest(&digest[0]);
							std::string const scheckdigest = checkmd5.digestToString(&digest[0]);

							if ( scheckdigest != sdigest )
							{
								libmaus2::exception::LibMausException lme;
								lme.getStream() << "libmaus2::fastx::FastAStreamSet: checksum for file " << foundfn << " is wrong" << std::endl;
								lme.finish();
								throw lme;
							}
						}
					}
				}

				return M;
			}
		};
	}
}
#endif
