LOCUS       HUMCTHD                 2038 bp    mRNA    linear   HUM 08-AUG-1995
DEFINITION  Human cathepsin D mRNA, complete cds.
ACCESSION   M11233
VERSION     M11233.1
KEYWORDS    aspartyl protease; cathepsin; lysosomal endoprotease.
SOURCE      Homo sapiens (human)
  ORGANISM  Homo sapiens
            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
            Catarrhini; Hominidae; Homo.
REFERENCE   1  (bases 1 to 2038)
  AUTHORS   Faust,P.L., Kornfeld,S. and Chirgwin,J.M.
  TITLE     Cloning and sequence analysis of cDNA for human cathepsin D
  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 82 (15), 4910-4914 (1985)
   PUBMED   3927292
COMMENT     Original source text: Human kidney, cDNA to mRNA, clone pHKCD45;
            hepatoma cell line G2 (HepG2), clone lambda-HG2CD1.1.
            There are seven more amino acids encoded by the human mRNA than are
            encoded by the porcine mRNA (positions 535-555).  The extra amino
            acids are found at the junction between the light and heavy chains
            of cathepsin.  It is at present not known where the human protein
            is cleaved to form these two chains.
FEATURES             Location/Qualifiers
     source          1..2038
                     /db_xref="H-InvDB:HIT000194107"
                     /organism="Homo sapiens"
                     /mol_type="mRNA"
                     /db_xref="taxon:9606"
                     /map="11p15.5"
     gene            1..2038
                     /gene="CTSD"
     mRNA            <1..2038
                     /gene="CTSD"
                     /product="cth mRNA"
     CDS             52..1290
                     /gene="CTSD"
                     /note="preprocathepsin D"
                     /codon_start=1
                     /protein_id="AAB59529.1"
                     /db_xref="GDB:G00-120-512"
                     /translation="MQPSSLLPLALCLLAAPASALVRIPLHKFTSIRRTMSEVGGSVE
                     DLIAKGPVSKYSQAVPAVTEGPIPEVLKNYMDAQYYGEIGIGTPPQCFTVVFDTGSSN
                     LWVPSIHCKLLDIACWIHHKYNSDKSSTYVKNGTSFDIHYGSGSLSGYLSQDTVSVPC
                     QSASSASALGGVKVERQVFGEATKQPGITFIAAKFDGILGMAYPRISVNNVLPVFDNL
                     MQQKLVDQNIFSFYLSRDPDAQPGGELMLGGTDSKYYKGSLSYLNVTRKAYWQVHLDQ
                     VEVASGLTLCKEGCEAIVDTGTSLMVGPVDEVRELQKAIGAVPLIQGEYMIPCEKVST
                     LPAITLKLGGKGYKLSPEDYTLKVSQAGKTLCLSGFMGMDIPPPSGPLWILGDVFIGR
                     YYTVFDRDNNRVGFAEAARL"
     sig_peptide     52..111
                     /gene="CTSD"
                     /note="cathepsin D signal peptide"
     mat_peptide     244..1287
                     /gene="CTSD"
                     /product="cathepsin D (light and heavy chains; see
                     comment)"
     variation       775
                     /gene="CTSD"
                     /note="g in pHKCD45; a in lambda-HG2CD1.1"
                     /replace="a"
BASE COUNT          356 a          689 c          607 g          386 t
ORIGIN      119 bp upstream of BamHI site.
        1 ggctataagc gcacggcctc ggcgaccctc tccgacccgg ccgccgccgc catgcagccc
       61 tccagccttc tgccgctcgc cctctgcctg ctggctgcac ccgcctccgc gctcgtcagg
      121 atcccgctgc acaagttcac gtccatccgc cggaccatgt cggaggttgg gggctctgtg
      181 gaggacctga ttgccaaagg ccccgtctca aagtactccc aggcggtgcc agccgtgacc
      241 gaggggccca ttcccgaggt gctcaagaac tacatggacg cccagtacta cggggagatt
      301 ggcatcggga cgccccccca gtgcttcaca gtcgtcttcg acacgggctc ctccaacctg
      361 tgggtcccct ccatccactg caaactgctg gacatcgctt gctggatcca ccacaagtac
      421 aacagcgaca agtccagcac ctacgtgaag aatggtacct cgtttgacat ccactatggc
      481 tcgggcagcc tctccgggta cctgagccag gacactgtgt cggtgccctg ccagtcagcg
      541 tcgtcagcct ctgccctggg cggtgtcaaa gtggagaggc aggtctttgg ggaggccacc
      601 aagcagccag gcatcacctt catcgcagcc aagttcgatg gcatcctggg catggcctac
      661 ccccgcatct ccgtcaacaa cgtgctgccc gtcttcgaca acctgatgca gcagaagctg
      721 gtggaccaga acatcttctc cttctacctg agcagggacc cagatgcgca gcctgggggt
      781 gagctgatgc tgggtggcac agactccaag tattacaagg gttctctgtc ctacctgaat
      841 gtcacccgca aggcctactg gcaggtccac ctggaccagg tggaggtggc cagcgggctg
      901 accctgtgca aggagggctg tgaggccatt gtggacacag gcacttccct catggtgggc
      961 ccggtggatg aggtgcgcga gctgcagaag gccatcgggg ccgtgccgct gattcagggc
     1021 gagtacatga tcccctgtga gaaggtgtcc accctgcccg cgatcacact gaagctggga
     1081 ggcaaaggct acaagctgtc cccagaggac tacacgctca aggtgtcgca ggccgggaag
     1141 accctctgcc tgagcggctt catgggcatg gacatcccgc cacccagcgg gccactctgg
     1201 atcctgggcg acgtcttcat cggccgctac tacactgtgt ttgaccgtga caacaacagg
     1261 gtgggcttcg ccgaggctgc ccgcctctag ttcccaaggc gtccgcgcgc cagcacagaa
     1321 acagaggaga gtcccagagc aggaggcccc tggcccagcg gcccctccca cacacaccca
     1381 cacactcgcc cgcccactgt cctgggcgcc ctggaagccg gcggcccaag cccgacttgc
     1441 tgttttgttc tgtggttttc ccctccctgg gttcagaaat gctgcctgcc tgtctgtctc
     1501 tccatctgtt tggtgggggt agagctgatc cagagcacag atctgtttcg tgcattggaa
     1561 gaccccaccc aagcttggca gccgagctcg tgtatcctgg ggctcccttc atctccaggg
     1621 agtcccctcc ccggccctac cagcgcccgc tgggctgagc ccctacccca caccaggccg
     1681 tcctcccggg ccctcccttg gaaacctgcc ctgcctgagg gcccctctgc ccagcttggg
     1741 cccagctggg ctctgccacc ctacctgttc agtgtcccgg gcccgttgag gatgaggccg
     1801 ctagaggcct gaggatgagc tggaaggagt gagaggggac aaaacccacc ttgttggagc
     1861 ctgcagggtg gtgctgggac tgagccagtc ccaggggcat gtattggcct ggaggtgggg
     1921 ttgggattgg gggctggtgc cagccttcct ctgcagctga cctctgttgt cctccccttg
     1981 ggcggctgag agccccagct gacatggaaa tacagttgtt ggcctccggc ctcccctc
//