
// Simple library for bioinformatics tasks

/// Sequence utilities

export class SeqRecord {

    constructor(title, sequence, isAA=null) {
        this.title = title
        this.sequence = sequence.toUpperCase()
        this.isAA = isAA
    }

    toFasta(width=80) {
        if (width === null) {
            width = this.sequence.length
        }
        let fasta = ">  " + this.title + "\n"
        return fasta + this.sequence.match(new RegExp('.{1,' + width + '}', 'g')).join("\n")
    }

    // No toClustal since its meant for multiple sequences

    isAminoAcidSeq() {
        if (this.isAA !== null) {
            return this.isAA // Use the specified info if available
        } else {
            const lowered = this.sequence;
            let nucleotideLetters = 0;  // Technically we are counting RNA letters since we are allowing U
            let totalNonGappedCount = 0;
            for (let i = 0; i < lowered.length; i++) {
                let letter = lowered[i]
                if (letter !== '-' && letter !== ' ') {
                    totalNonGappedCount++
                }
                if (letter === 'A' || letter === 'T' || letter === 'C' || letter === 'G' || letter === 'U') {
                    nucleotideLetters++
                }
            }
            // 24 possible amino acid letters, if we naively assume they have an even distribution then
            // A + T + C + G + U should appear 5/24 times (~21%) So if these appear greater than this percentage then
            // we could assume its a nucleotide sequence. But lets be conservative and require ~80% composition
            return nucleotideLetters / totalNonGappedCount < 0.8;
        }
    }

    length() {
        return this.sequence.length
    }
}

export class SeqRecordCollection {

    constructor(records=null, areAAs=null) {
        this.records = records === null ? [] : records
        this.areAAs = areAAs
        this.recordLength = this._recordLength()
        this.recordsAsColumns = this._getColumns()  // Compile the columns for efficiency
        this.columnFrequencies = this._getColumnClustalFrequencies() // Compile frequencies for efficiency
    }

    length() {
        return this.records.length
    }

    _recordLength() {
        let max = 0
        for (let i = 0; i < this.length(); i++) {
            let rec = this.records[i]
            if (rec.length() > max) {
                max = rec.length()
            }
        }
        return max
    }

    toFasta(width=80) {
        return this.records.map((rec) => rec.toFasta(width)).join("\n")
    }

    toClustal() {
        // https://meme-suite.org/meme/doc/clustalw-format.html  Note: conservation is optional so is ignored
        let clustal = "CLUSTAL W Dynamically Generated CLUSTALW format MSA\n\n"

        let seqChunks = []  // List of lists (inner list = sequences in chunk)
        let maxRecordNameLen = 0

        for (let i = 0; i < this.records.length; i++) {
            if (this.records[i].title.length > maxRecordNameLen) {
                maxRecordNameLen = this.records[i].title.length
            }
        }

        let padNameTo = maxRecordNameLen + 5

        for (let i = 0; i < this.recordsAsColumns.length; i++) {
            let col = this.recordsAsColumns[i]
            if (i % 60 === 0) {  // New chunk (chunks are 60 long)
                seqChunks.push([])
            }
            let chunkIndex = Math.trunc(i / 60)
            for (let j = 0; j < col.length; j++) {
                if (j >= seqChunks[chunkIndex].length) {
                    seqChunks[chunkIndex].push(col[j])
                } else {
                    seqChunks[chunkIndex][j] += col[j]
                }
            }
        }

        for (let i = 0; i < seqChunks.length; i++) {
            let chunk = seqChunks[i]
            for (let j = 0; j < chunk.length; j++) {
                let title = this.records[j].title
                clustal += title
                clustal += " ".repeat(padNameTo - title.length)
                clustal += chunk[j]
                clustal += " "
                clustal += ((i * 60) + chunk[j].length).toString()
                clustal += "\n"
            }
            clustal += "\n"
        }

        return clustal
    }

    toPhylip() {
        // http://rosalind.info/glossary/phylip-format/
        // https://evolution.gs.washington.edu/phylip/doc/main.html#inputfiles
        let phylip = `${this.records.length} ${this.recordLength}\n`
        const chunks = Math.ceil(this.recordLength / 20)

        for (let i = 0; i < this.records.length; i++) {
            let recTitle = this.records[i].title
            if (recTitle.length > 10) {
                recTitle = recTitle.substring(0, 10)
            } else {
                recTitle += " ".repeat(10 - recTitle.length)
            }
            phylip += recTitle
            phylip += this.records[i].sequence.substring(0, 20)
            phylip += "\n"
        }

        for (let j = 1; j < chunks; j++) {
            for (let i = 0; i < this.records.length; i++) {
                phylip += this.records[i].sequence.substring(j*20, (j*20)+20)
                phylip += "\n"
            }
        }

        return phylip
    }

    isAminoAcidCollection() {
        if (this.areAAs !== null) {
            return this.areAAs
        }

        let aaCount = 0;
        for (let i = 0; i < this.length(); i++) {
            if (this.records[i].isAminoAcidSeq()) {
                aaCount++
            }
        }
        return aaCount > 0.5; // Require greater than half to be AAs to assume it is aas
    }

    _getColumn(i) {
        let col = []
        for (let j = 0; j < this.records.length; j++) {
            if (i < this.records[j].length()) {
                col.push(this.records[j].sequence[i])
            } else {
                col.push("-")  // Auto insert gaps to keep length consistent
            }
        }

        return col
    }

    getColumn(i) {
        return this.recordsAsColumns[i]
    }

    _getColumns() {
        let cols = []
        let numCols = this.recordLength
        for (let i = 0; i < numCols; i++) {
            cols.push(this._getColumn(i))
        }
        return cols
    }

    getColumns() {
        return this.recordsAsColumns
    }

    _getColumnClustalFrequencies() {
        // This is precompiled to speed up clustal highlighting
        let freqs = []
        for (let i = 0; i < this.recordsAsColumns.length; i++) {
            let col = this.recordsAsColumns[i]
            let nonGappedCount = 0
            // Add clustal specific motifs for amino acids
            let counts = {
                'WLVIMAFCHP': 0,
                'KR': 0,
                'QE': 0,
                'ED': 0,
                'TS': 0
            }
            for (let j = 0; j < col.length; j++) {
                if (col[j] !== '-') {
                    nonGappedCount++
                    if (col[j] in counts) {
                        counts[col[j]]++
                    } else {
                        counts[col[j]] = 1
                    }
                    switch(col[j]) {
                        case 'W':
                        case 'L':
                        case 'V':
                        case 'I':
                        case 'M':
                        case 'A':
                        case 'F':
                        case 'C':
                        case 'H':
                        case 'P':
                            counts['WLVIMAFCHP']++
                            break
                        case 'E':
                            counts['QE']++
                            counts['ED']++
                            break
                        case 'K':
                        case 'R':
                            counts['KR']++
                            break
                        case 'Q':
                            counts['QE']++
                            break
                        case 'D':
                            counts['ED']++
                            break
                        case 'T':
                        case 'S':
                            counts['TS']++
                            break
                    }
                }
            }
            for (const k of Object.keys(counts)) {
                counts[k] /= nonGappedCount
            }
            freqs.push(counts)
        }
        return freqs
    }
}

function fastaParser(sequencesText) {
    let records = []
    const lines = sequencesText.trim().split("\n")
    let currTitle = null, currSequence = null
    for (let i = 0; i < lines.length; i++) {
        let line = lines[i].trim()
        if (line.length > 0) {
            if (line.startsWith(">")) { // Title
                let title = line.substring(1).trim()
                if (currTitle === null) {
                    currTitle = title
                } else {
                    if (currSequence === null) {
                        throw new Error("Empty record for " + currTitle)
                    }
                    if (currSequence.endsWith("*")) {  // Technically fastas can end in * so strip it
                        currSequence = "".substring(0, currSequence.length - 1)
                    }
                    records.push(new SeqRecord(currTitle, currSequence))
                    currTitle = title
                    currSequence = null
                }
            } else { // Sequence
                if (currSequence === null) {
                    currSequence = line
                } else {
                    currSequence += line
                }
            }
        }
    }
    if (currTitle !== null) {
        if (currSequence === null) {
            throw new Error("Empty record for " + currTitle)
        }
        if (currSequence.endsWith("*")) {  // Technically fastas can end in * so strip it
            currSequence = "".substring(0, currSequence.length - 1)
        }
        records.push(new SeqRecord(currTitle, currSequence))
    }
    return records
}

function clustalParser(sequencesText) {
    let records = {}
    const lines = sequencesText.trim().split("\n")

    for (let i = 0; i < lines.length; i++) {
        let line = lines[i].trim()
        if (line.length > 0) {
            if (line.toUpperCase().startsWith("CLUSTAL")) {
                continue
            }
            if (line[0] === '*' || line[0] === ':' || line[0] === '.') {
                continue // Ignore conservation
            }

            let split = line.split(/[ \t]+/)
            let title = split[0]
            if (title in records) {
                records[title] += split[1]
            } else {
                records[title] = split[1]
            }
        }
    }

    let recordCollection = []
    for (const [k, v] of Object.entries(records)) {
        recordCollection.push(new SeqRecord(k, v))
    }

    return recordCollection
}

const filetype2parser = {
    "fa": fastaParser,
    "faa": fastaParser,
    "fna": fastaParser,
    "fas": fastaParser,
    "fasta": fastaParser,
    "clustal": clustalParser,
    "aln": clustalParser,
    // "phy": phylipParser,
    // "ph": phylipParser,
    // "phylip": phylipParser
}

function tryDetectFormat(sequencesText) {
    // Heuristics to detect sequence format
    if (sequencesText.trim().startsWith(">")) {
        return "fasta"
    } else if (sequencesText.toUpperCase().trim().startsWith("CLUSTAL")) {
        return "clustal"
    // } else if (!isNaN(parseInt(sequencesText.split("\n")[0].trim()))) {
    //     return "phylip"
    }
    return null
}

export function parseSequences(sequences, hint=null) {
    const parser = filetype2parser[hint === null ? tryDetectFormat(sequences) : hint.toLowerCase()] || null
    if (parser === null) {
        throw new Error("Unable to parse sequence text (hint: " + hint + "): " + sequences)
    }
    return new SeqRecordCollection(parser(sequences))
}

// Color Schemes
export class LetterColoring {
    constructor(backgroundColor = null, textColor = null) {
        this.backgroundColor = backgroundColor === null ? "white" : backgroundColor
        this.textColor = textColor === null ? "black" : textColor
    }

    toSyleCss() {
        return "background-color: " + this.backgroundColor + "; color: " + this.textColor + ";"
    }
}

const CLUSTAL_NUCLEOTIDE = {
    'A': new LetterColoring('#64F73F'),
    'C': new LetterColoring('#FFB340'),
    'G': new LetterColoring('#EB413C'),
    'T': new LetterColoring('#3C88EE'),
    'U': new LetterColoring('#3C88EE')
}
const CLUSTAL_COLORS = {
    'BLUE': new LetterColoring('#80a0f0'),
    'PINK': new LetterColoring('#f08080'),
    'RED': new LetterColoring('#f01505'),
    'MAGENTA': new LetterColoring('#c048c0'),
    'GREEN': new LetterColoring('#15c015'),
    'ORANGE': new LetterColoring('#f09048'),
    'YELLOW': new LetterColoring('#c0c000'),
    'CYAN': new LetterColoring('#15a4a4')
}

class ColorScheme {

    constructor() {
    }

    // eslint-disable-next-line no-unused-vars
    colorPosition(collection, character, isAA, columnIndex) {  // Returns a LetterColoring instance
        return DEFAULT_LETTER_COLORING
    }
}

class ClustalAdaptiveColorScheme extends ColorScheme {

    constructor() {
        super();
    }

    colorPosition(collection, character, isAA, columnIndex) {
        return isAA ? this.aaColorPosition(collection, character, columnIndex) : this.nonAaColorPosition(collection, character, columnIndex)
    }

    _getFrequency(collection, columnIndex, searchChars) {
        return collection.columnFrequencies[columnIndex][searchChars]
    }

    // Based on http://www.jalview.org/help/html/colourSchemes/clustal.html
    aaColorPosition(collection, character, columnIndex) {
        switch (character) {
            // Hydrophobic
            case 'A':
            case 'I':
            case 'L':
            case 'M':
            case 'F':
            case 'W':
            case 'V':
                if (this._getFrequency(collection, columnIndex, "WLVIMAFCHP") > 0.6) {
                    return CLUSTAL_COLORS['BLUE']
                }
                break
            case 'C':
                if (this._getFrequency(collection, columnIndex, 'WLVIMAFCHP') > 0.6) {
                    return CLUSTAL_COLORS['BLUE']
            // Cysteines
                } else if (this._getFrequency(collection, columnIndex, 'C') > 0.85) {
                    return CLUSTAL_COLORS['PINK']
                }
                break
            // Positive charge
            case 'K':
            case 'R':
                if (this._getFrequency(collection, columnIndex, 'KR') > 0.6
                    || this._getFrequency(collection, columnIndex, 'K') > 0.8
                    || this._getFrequency(collection, columnIndex, 'R') > 0.8
                    || this._getFrequency(collection, columnIndex, 'Q') > 0.8) {
                    return CLUSTAL_COLORS['RED']
                }
                break
            // Negative charge
            case 'E':
                if (this._getFrequency(collection, columnIndex, 'KR') > 0.6
                    || this._getFrequency(collection, columnIndex, 'QE') > 0.5
                    || this._getFrequency(collection, columnIndex, 'E') > 0.85
                    || this._getFrequency(collection, columnIndex, 'Q') > 0.85
                    || this._getFrequency(collection, columnIndex, 'D') > 0.85) {
                    return CLUSTAL_COLORS['MAGENTA']
                }
                break
            case 'D':
                if (this._getFrequency(collection, columnIndex, 'KR') > 0.6
                    || this._getFrequency(collection, columnIndex, 'K') > 0.85
                    || this._getFrequency(collection, columnIndex, 'R') > 0.85
                    || this._getFrequency(collection, columnIndex, 'Q') > 0.85
                    || this._getFrequency(collection, columnIndex, 'ED') > 0.5) {
                    return CLUSTAL_COLORS['MAGENTA']
                }
                break
            // Polar
            case 'N':
                if (this._getFrequency(collection, columnIndex, 'N') > 0.5
                    || this._getFrequency(collection, columnIndex, 'N') > 0.85
                    || this._getFrequency(collection, columnIndex, 'Y') > 0.85) {
                    return CLUSTAL_COLORS['GREEN']
                }
                break
            case 'Q':
                if (this._getFrequency(collection, columnIndex, 'KR') > 0.6
                    || this._getFrequency(collection, columnIndex, 'QE') > 0.5
                    || this._getFrequency(collection, columnIndex, 'Q') > 0.85
                    || this._getFrequency(collection, columnIndex, 'E') > 0.85
                    || this._getFrequency(collection, columnIndex, 'K') > 0.85
                    || this._getFrequency(collection, columnIndex, 'R') > 0.85) {
                    return CLUSTAL_COLORS['GREEN']
                }
                break
            case 'S':
            case 'T':
                if (this._getFrequency(collection, columnIndex, 'WLVIMAFCHP') > 0.6
                    || this._getFrequency(collection, columnIndex, 'TS') > 0.5
                    || this._getFrequency(collection, columnIndex, 'S') > 0.85
                    || this._getFrequency(collection, columnIndex, 'T') > 0.85) {
                    return CLUSTAL_COLORS['GREEN']
                }
                break
            // Glycines
            case 'G':
                return CLUSTAL_COLORS['ORANGE']
            // Prolines
            case 'P':
                return CLUSTAL_COLORS['YELLOW']
            // Aromatic
            case 'H':
            case 'Y':
                if (this._getFrequency(collection, columnIndex, 'WLVIMAFCHP') > 0.6
                    || this._getFrequency(collection, columnIndex, 'W') > 0.85
                    || this._getFrequency(collection, columnIndex, 'Y') > 0.85
                    || this._getFrequency(collection, columnIndex, 'A') > 0.85
                    || this._getFrequency(collection, columnIndex, 'C') > 0.85
                    || this._getFrequency(collection, columnIndex, 'P') > 0.85
                    || this._getFrequency(collection, columnIndex, 'Q') > 0.85
                    || this._getFrequency(collection, columnIndex, 'F') > 0.85
                    || this._getFrequency(collection, columnIndex, 'H') > 0.85
                    || this._getFrequency(collection, columnIndex, 'I') > 0.85
                    || this._getFrequency(collection, columnIndex, 'L') > 0.85
                    || this._getFrequency(collection, columnIndex, 'M') > 0.85
                    || this._getFrequency(collection, columnIndex, 'V') > 0.85) {
                    return CLUSTAL_COLORS['CYAN']
                }
                break
        }
        return DEFAULT_LETTER_COLORING
    }

    // Based on table in http://www.jalview.org/help/html/colourSchemes/
    // eslint-disable-next-line no-unused-vars
    nonAaColorPosition(collection, character, columnIndex) {
        return CLUSTAL_NUCLEOTIDE[character] || DEFAULT_LETTER_COLORING
    }
}

const ALIVEW_NUCLEOTIDE = {
    'A': new LetterColoring('#018001', "rgb(90,220,90)"),
    'C': new LetterColoring('#0101FF', "rgb(100,100,250)"),
    'G': new LetterColoring('#000000', "rgb(90,90,90)"),
    'T': new LetterColoring('#FF0101', "rgb(245,130,130)"),
    'U': new LetterColoring('#FF0101', "rgb(245,130,130)"),
    'R': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    'Y': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    'M': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    'K': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    'W': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    'S': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    'B': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    'D': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    'H': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    'V': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    'N': new LetterColoring('#FF00FF', "rgb(255, 255, 255)"),
    '-': new LetterColoring('#868686', "rgb(250, 250, 250)"),
    'DEFAULT': new LetterColoring('#00FFFF', "rgb(255, 255, 255)")
}
const ALIVIEW_AA = {
    'A': new LetterColoring("#276eb7", "rgb(0, 0, 0)"),
    'C': new LetterColoring("#e68080", "rgb(0, 0, 0)"),
    'D': new LetterColoring("#cc4dcc", "rgb(0, 0, 0)"),
    'E': new LetterColoring("#984097", "rgb(0, 0, 0)"),
    'F': new LetterColoring("#1980e6", "rgb(0, 0, 0)"),
    'G': new LetterColoring("#e6994d", "rgb(0, 0, 0)"),
    'H': new LetterColoring("#19b3b3", "rgb(0, 0, 0)"),
    'I': new LetterColoring("#4ea0f3", "rgb(0, 0, 0)"),
    'K': new LetterColoring("#e63319", "rgb(0, 0, 0)"),
    'L': new LetterColoring("#78a6d5", "rgb(0, 0, 0)"),
    'M': new LetterColoring("#0f549b", "rgb(0, 0, 0)"),
    'N': new LetterColoring("#19cc19", "rgb(0, 0, 0)"),
    'P': new LetterColoring("#cccc00", "rgb(0, 0, 0)"),
    'Q': new LetterColoring("#5ced5c", "rgb(0, 0, 0)"),
    'R': new LetterColoring("#f6442c", "rgb(0, 0, 0)"),
    'S': new LetterColoring("#029602", "rgb(0, 0, 0)"),
    'T': new LetterColoring("#45c945", "rgb(0, 0, 0)"),
    'V': new LetterColoring("#047df9", "rgb(0, 0, 0)"),
    'W': new LetterColoring("#0355a9", "rgb(0, 0, 0)"),
    'Y': new LetterColoring("#14c6c8", "rgb(0, 0, 0)"),
    'X': new LetterColoring("rgb(255, 255, 255)", "rgb(0, 255, 255)"),
    '-': new LetterColoring("rgb(230,230,230)", "rgb(64, 64, 64)")
}
// Based on https://github.com/AliView/AliView/blob/master/src/main/java/aliview/color/DefaultColorScheme.java
class AliviewColorScheme extends ColorScheme {

    constructor() {
        super();
    }

    colorPosition(collection, character, isAA, columnIndex) {
        return isAA ? this.aaColorPosition(collection, character, columnIndex) : this.nonAaColorPosition(collection, character, columnIndex)
    }

    // eslint-disable-next-line no-unused-vars
    aaColorPosition(collection, character, columnIndex) {
        return ALIVIEW_AA[character] || DEFAULT_LETTER_COLORING
    }

    // eslint-disable-next-line no-unused-vars
    nonAaColorPosition(collection, character, columnIndex) {
        return ALIVEW_NUCLEOTIDE[character] || ALIVEW_NUCLEOTIDE['DEFAULT']
    }
}

export const DEFAULT_LETTER_COLORING = new LetterColoring()
export const NO_COLOR_SCHEME = new ColorScheme()
export const CLUSTAL_ADAPTIVE_COLOR_SCHEME = new ClustalAdaptiveColorScheme()
export const ALIVIEW_COLOR_SCHEME = new AliviewColorScheme()
