LOCUS       Z74615                  6728 bp    mRNA    linear   HUM 07-OCT-2008
DEFINITION  H.sapiens mRNA for prepro-alpha1(I) collagen.
ACCESSION   Z74615
VERSION     Z74615.1
KEYWORDS    alpha1(I)-collagen.
SOURCE      Homo sapiens (human)
  ORGANISM  Homo sapiens
            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
            Catarrhini; Hominidae; Homo.
REFERENCE   1  (bases 1 to 36)
  AUTHORS   Chu M., de Wet W., Bernard M., Ramirez F.
  TITLE     Fine structural analysis on the human pro-alpha1(I) collagen gene
  JOURNAL   J Biol Chem 260(4), 2315-2320(1985).
   PUBMED   2857713
REFERENCE   2  (bases 37 to 1536)
  AUTHORS   Tromp G., Kuivaniemi H., Stacey A., Shikata H., Baldwin C.T.,
            Jaenisch R., Prockop D.J.
  TITLE     Structure of a full-length cDNA clone for the prepro alpha 1(I)
            chain of human type I procollagen
  JOURNAL   Biochem. J. 253(3), 919-922(1988).
   PUBMED   3178743
REFERENCE   3  (bases 1537 to 3803)
  AUTHORS   Bernard M.P., Chu M., Myers J.C., Ramirez F., Eikenberry E.F.,
            Prockop D.J.
  TITLE     Nucleotide sequences of complementary deoxyribonucleic acids for
            the pro alpha 1 chain of human type I procollagen. Statistical
            evaluation of structures that are conserved during evolution
  JOURNAL   Biochemistry 22(22), 5213-5223(1983).
   PUBMED   6689127
REFERENCE   4  (bases 3804 to 4481)
  AUTHORS   Makela J.K., Raassina M., Virta A., Vuorio E.
  TITLE     Human pro alpha 1(I) collagen: cDNA sequence for the C-propeptide
            domain
  JOURNAL   Nucleic Acids Res. 16(1), 349-349(1988).
   PUBMED   3340531
REFERENCE   5  (bases 4482 to 6728)
  AUTHORS   Maatta A., Bornstein P., Penttinen R.P.
  TITLE     Highly conserved sequences in the 3'-untranslated region of the
            COL1A1 gene bind cell-specific nuclear proteins
  JOURNAL   FEBS Lett. 279(1), 9-13(1991).
   PUBMED   1995349
REFERENCE   6  (bases 1 to 6728)
  AUTHORS   Dalgleish R.
  JOURNAL   Submitted (01-JUL-1996) to the INSDC. Raymond Dalgleish, Department
            of Genetics, University of Leicester, University Road, Leicester,
            LE1 7RH, United Kingdom
REFERENCE   7  (bases 1 to 6728)
  AUTHORS   Dalgleish R.
  TITLE     The human type I collagen mutation database
  JOURNAL   Nucleic Acids Res. 25(1), 181-187(1997).
   PUBMED   9016532
FEATURES             Location/Qualifiers
     source          1..6728
                     /db_xref="H-InvDB:HIT000328003"
                     /organism="Homo sapiens"
                     /mol_type="mRNA"
                     /db_xref="taxon:9606"
     5'UTR           1..119
     exon            1..222
                     /number=1
     CDS             120..4514
                     /product="prepro-alpha1(I) collagen"
                     /db_xref="GOA:P02452"
                     /db_xref="H-InvDB:HIT000328003.14"
                     /db_xref="HGNC:HGNC:2197"
                     /db_xref="InterPro:IPR000885"
                     /db_xref="InterPro:IPR001007"
                     /db_xref="InterPro:IPR008160"
                     /db_xref="PDB:1Q7D"
                     /db_xref="PDB:2LLP"
                     /db_xref="PDB:3EJH"
                     /db_xref="PDB:3GXE"
                     /db_xref="PDB:5CTD"
                     /db_xref="PDB:5CTI"
                     /db_xref="PDB:5CVA"
                     /db_xref="PDB:5CVB"
                     /db_xref="PDB:5K31"
                     /db_xref="PDB:5OU8"
                     /db_xref="PDB:5OU9"
                     /db_xref="UniProtKB/Swiss-Prot:P02452"
                     /protein_id="CAA98968.1"
                     /translation="MFSFVDLRLLLLLAATALLTHGQEEGQVEGQDEDIPPITCVQNG
                     LRYHDRDVWKPEPCRICVCDNGKVLCDDVICDETKNCPGAEVPEGECCPVCPDGSESP
                     TDQETTGVEGPKGDTGPRGPRGPAGPPGRDGIPGQPGLPGPPGPPGPPGPPGLGGNFA
                     PQLSYGYDEKSTGGISVPGPMGPSGPRGLPGPPGAPGPQGFQGPPGEPGEPGASGPMG
                     PRGPPGPPGKNGDDGEAGKPGRPGERGPPGPQGARGLPGTAGLPGMKGHRGFSGLDGA
                     KGDAGPAGPKGEPGSPGENGAPGQMGPRGLPGERGRPGAPGPAGARGNDGATGAAGPP
                     GPTGPAGPPGFPGAVGAKGEAGPQGPRGSEGPQGVRGEPGPPGPAGAAGPAGNPGADG
                     QPGAKGANGAPGIAGAPGFPGARGPSGPQGPGGPPGPKGNSGEPGAPGSKGDTGAKGE
                     PGPVGVQGPPGPAGEEGKRGARGEPGPTGLPGPPGERGGPGSRGFPGADGVAGPKGPA
                     GERGSPGPAGPKGSPGEAGRPGEAGLPGAKGLTGSPGSPGPDGKTGPPGPAGQDGRPG
                     PPGPPGARGQAGVMGFPGPKGAAGEPGKAGERGVPGPPGAVGPAGKDGEAGAQGPPGP
                     AGPAGERGEQGPAGSPGFQGLPGPAGPPGEAGKPGEQGVPGDLGAPGPSGARGERGFP
                     GERGVQGPPGPAGPRGANGAPGNDGAKGDAGAPGAPGSQGAPGLQGMPGERGAAGLPG
                     PKGDRGDAGPKGADGSPGKDGVRGLTGPIGPPGPAGAPGDKGESGPSGPAGPTGARGA
                     PGDRGEPGPPGPAGFAGPPGADGQPGAKGEPGDAGAKGDAGPPGPAGPAGPPGPIGNV
                     GAPGAKGARGSAGPPGATGFPGAAGRVGPPGPSGNAGPPGPPGPAGKEGGKGPRGETG
                     PAGRPGEVGPPGPPGPAGEKGSPGADGPAGAPGTPGPQGIAGQRGVVGLPGQRGERGF
                     PGLPGPSGEPGKQGPSGASGERGPPGPMGPPGLAGPPGESGREGAPAAEGSPGRDGSP
                     GAKGDRGETGPAGPPGAPGAPGAPGPVGPAGKSGDRGETGPAGPAGPVGPVGARGPAG
                     PQGPRGDKGETGEQGDRGIKGHRGFSGLQGPPGPPGSPGEQGPSGASGPAGPRGPPGS
                     AGAPGKDGLNGLPGPIGPPGPRGRTGDAGPVGPPGPPGPPGPPGPPSAGFDFSFLPQP
                     PQEKAHDGGRYYRADDANVVRDRDLEVDTTLKSLSQQIENIRSPEGSRKNPARTCRDL
                     KMCHSDWKSGEYWIDPNQGCNLDAIKVFCNMETGETCVYPTQPSVAQKNWYISKNPKD
                     KRHVWFGESMTDGFQFEYGGQGSDPADVAIQLTFLRLMSTEASQNITYHCKNSVAYMD
                     QQTGNLKKALLLKGSNEIEIRAEGNSRFTYSVTVDGCTSHTGAWGKTVIEYKTTKSSR
                     LPIIDVAPLDVGAPDQEFGFDVGPVCFL"
     sig_peptide     120..185
     misc_feature    186..602
                     /note="N_propeptide"
     exon            223..417
                     /number=2
     exon            418..452
                     /number=3
     exon            453..488
                     /number=4
     exon            489..590
                     /number=5
     exon            591..662
                     /number=6
     misc_feature    603..653
                     /note="N_telopeptide"
     misc_feature    654..3695
                     /note="triple_helix"
     exon            663..707
                     /number=7
     exon            708..761
                     /number=8
     exon            762..815
                     /number=9
     exon            816..869
                     /number=10
     exon            870..923
                     /number=11
     exon            924..977
                     /number=12
     exon            978..1022
                     /number=13
     exon            1023..1076
                     /number=14
     exon            1077..1121
                     /number=15
     exon            1122..1175
                     /number=16
     exon            1176..1274
                     /number=17
     exon            1275..1319
                     /number=18
     exon            1320..1418
                     /number=19
     exon            1419..1472
                     /number=20
     exon            1473..1580
                     /number=21
     exon            1581..1634
                     /number=22
     exon            1635..1733
                     /number=23
     exon            1734..1787
                     /number=24
     exon            1788..1886
                     /number=25
     exon            1887..1940
                     /number=26
     exon            1941..1994
                     /number=27
     exon            1995..2048
                     /number=28
     exon            2049..2102
                     /number=29
     exon            2103..2147
                     /number=30
     exon            2148..2246
                     /number=31
     exon            2247..2354
                     /number=32
     exon            2355..2462
                     /note="exons 33-34"
     exon            2463..2516
                     /number=35
     exon            2517..2570
                     /number=36
     exon            2571..2678
                     /number=37
     exon            2679..2732
                     /number=38
     exon            2733..2786
                     /number=39
     exon            2787..2948
                     /number=40
     exon            2949..3056
                     /number=41
     exon            3057..3164
                     /number=42
     exon            3165..3218
                     /number=43
     exon            3219..3326
                     /number=44
     exon            3327..3380
                     /number=45
     exon            3381..3488
                     /number=46
     exon            3489..3542
                     /number=47
     exon            3543..3650
                     /number=48
     old_sequence    3569..3571
                     /replace="cnc"
                     /citation=[4]
     exon            3651..3933
                     /number=49
     misc_feature    3696..3773
                     /note="C_telopeptide"
     misc_feature    3774..4511
                     /note="C_propeptide"
     exon            3934..4124
                     /number=50
     exon            4125..4367
                     /number=51
     misc_feature    4212..4220
                     /note="carbohydrate attachment site"
     exon            4368..5921
                     /number=52
     regulatory      4763..4775
                     /regulatory_class="polyA_signal_sequence"
     polyA_site      4798
     regulatory      5891..5896
                     /regulatory_class="polyA_signal_sequence"
     polyA_site      5921
BASE COUNT         1213 a         2144 c         1971 g         1400 t
ORIGIN      
        1 agcagacggg agtttctcct cggggtcgga gcaggaggca cgcggagtgt gaggccacgc
       61 atgagcggac gctaaccccc tccccagcca caaagagtct acatgtctag ggtctagaca
      121 tgttcagctt tgtggacctc cggctcctgc tcctcttagc ggccaccgcc ctcctgacgc
      181 acggccaaga ggaaggccaa gtcgagggcc aagacgaaga catcccacca atcacctgcg
      241 tacagaacgg cctcaggtac catgaccgag acgtgtggaa acccgagccc tgccggatct
      301 gcgtctgcga caacggcaag gtgttgtgcg atgacgtgat ctgtgacgag accaagaact
      361 gccccggcgc cgaagtcccc gagggcgagt gctgtcccgt ctgccccgac ggctcagagt
      421 cacccaccga ccaagaaacc accggcgtcg agggacccaa gggagacact ggcccccgag
      481 gcccaagggg acccgcaggc ccccctggcc gagatggcat ccctggacag cctggacttc
      541 ccggaccccc cggacccccc ggacctcccg gaccccctgg cctcggagga aactttgctc
      601 cccagctgtc ttatggctat gatgagaaat caaccggagg aatttccgtg cctggcccca
      661 tgggtccctc tggtcctcgt ggtctccctg gcccccctgg tgcacctggt ccccaaggct
      721 tccaaggtcc ccctggtgag cctggcgagc ctggagcttc aggtcccatg ggtccccgag
      781 gtcccccagg tccccctgga aagaatggag atgatgggga agctggaaaa cctggtcgtc
      841 ctggtgagcg tgggcctcct gggcctcagg gtgctcgagg attgcccgga acagctggcc
      901 tccctggaat gaagggacac agaggtttca gtggtttgga tggtgccaag ggagatgctg
      961 gtcctgctgg tcctaagggt gagcctggca gccctggtga aaatggagct cctggtcaga
     1021 tgggcccccg tggcctgcct ggtgagagag gtcgccctgg agcccctggc cctgctggtg
     1081 ctcgtggaaa tgatggtgct actggtgctg ccgggccccc tggtcccacc ggccccgctg
     1141 gtcctcctgg cttccctggt gctgttggtg ctaagggtga agctggtccc caagggcccc
     1201 gaggctctga aggtccccag ggtgtgcgtg gtgagcctgg cccccctggc cctgctggtg
     1261 ctgctggccc tgctggaaac cctggtgctg atggacagcc tggtgctaaa ggtgccaatg
     1321 gtgctcctgg tattgctggt gctcctggct tccctggtgc ccgaggcccc tctggacccc
     1381 agggccccgg cggccctcct ggtcccaagg gtaacagcgg tgaacctggt gctcctggca
     1441 gcaaaggaga cactggtgct aagggagagc ctggccctgt tggtgttcaa ggaccccctg
     1501 gccctgctgg agaggaagga aagcgaggag ctcgaggtga acccggaccc actggcctgc
     1561 ccggaccccc tggcgagcgt ggtggacctg gtagccgtgg tttccctggc gcagatggtg
     1621 ttgctggtcc caagggtccc gctggtgaac gtggttctcc tggccccgct ggccccaaag
     1681 gatctcctgg tgaagctggt cgtcccggtg aagctggtct gcctggtgcc aagggtctga
     1741 ctggaagccc tggcagccct ggtcctgatg gcaaaactgg cccccctggt cccgccggtc
     1801 aagatggtcg ccccggaccc ccaggcccac ctggtgcccg tggtcaggct ggtgtgatgg
     1861 gattccctgg acctaaaggt gctgctggag agcccggcaa ggctggagag cgaggtgttc
     1921 ccggaccccc tggcgctgtc ggtcctgctg gcaaagatgg agaggctgga gctcagggac
     1981 cccctggccc tgctggtccc gctggcgaga gaggtgaaca aggccctgct ggctcccccg
     2041 gattccaggg tctccctggt cctgctggtc ctccaggtga agcaggcaaa cctggtgaac
     2101 agggtgttcc tggagacctt ggcgcccctg gcccctctgg agcaagaggc gagagaggtt
     2161 tccctggcga gcgtggtgtg caaggtcccc ctggtcctgc tggaccccga ggggccaacg
     2221 gtgctcccgg caacgatggt gctaagggtg atgctggtgc ccctggagct cccggtagcc
     2281 agggcgcccc tggccttcag ggaatgcctg gtgaacgtgg tgcagctggt cttccagggc
     2341 ctaagggtga cagaggtgat gctggtccca aaggtgctga tggctctcct ggcaaagatg
     2401 gcgtccgtgg tctgaccggc cccattggtc ctcctggccc tgctggtgcc cctggtgaca
     2461 agggtgaaag tggtcccagc ggccctgctg gtcccactgg agctcgtggt gcccccggag
     2521 accgtggtga gcctggtccc cccggccctg ctggctttgc tggcccccct ggtgctgacg
     2581 gccaacctgg tgctaaaggc gaacctggtg atgctggtgc caaaggcgat gctggtcccc
     2641 ctgggcctgc cggacccgct ggaccccctg gccccattgg taatgttggt gctcctggag
     2701 ccaaaggtgc tcgcggcagc gctggtcccc ctggtgctac tggtttccct ggtgctgctg
     2761 gccgagtcgg tcctcctggc ccctctggaa atgctggacc ccctggccct cctggtcctg
     2821 ctggcaaaga aggcggcaaa ggtccccgtg gtgagactgg ccctgctgga cgtcctggtg
     2881 aagttggtcc ccctggtccc cctggccctg ctggcgagaa aggatcccct ggtgctgatg
     2941 gtcctgctgg tgctcctggt actcccgggc ctcaaggtat tgctggacag cgtggtgtgg
     3001 tcggcctgcc tggtcagaga ggagagagag gcttccctgg tcttcctggc ccctctggtg
     3061 aacctggcaa acaaggtccc tctggagcaa gtggtgaacg tggtcccccc ggtcccatgg
     3121 gcccccctgg attggctgga ccccctggtg aatctggacg tgagggggct cctgctgccg
     3181 aaggttcccc tggacgagac ggttctcctg gcgccaaggg tgaccgtggt gagaccggcc
     3241 ccgctggacc ccctggtgct cctggtgctc ctggtgcccc tggccccgtt ggccctgctg
     3301 gcaagagtgg tgatcgtggt gagactggtc ctgctggtcc cgccggtccc gtcggccccg
     3361 tcggcgcccg tggccccgcc ggaccccaag gcccccgtgg tgacaagggt gagacaggcg
     3421 aacagggcga cagaggcata aagggtcacc gtggcttctc tggcctccag ggtccccctg
     3481 gccctcctgg ctctcctggt gaacaaggtc cctctggagc ctctggtcct gctggtcccc
     3541 gaggtccccc tggctctgct ggtgctcctg gcaaagatgg actcaacggt ctccctggcc
     3601 ccattgggcc ccctggtcct cgcggtcgca ctggtgatgc tggtcctgtt ggtccccccg
     3661 gccctcctgg acctcctggt ccccctggtc ctcccagcgc tggtttcgac ttcagcttcc
     3721 tgccccagcc acctcaagag aaggctcacg atggtggccg ctactaccgg gctgatgatg
     3781 ccaatgtggt tcgtgaccgt gacctcgagg tggacaccac cctcaagagc ctgagccagc
     3841 agatcgagaa catccggagc ccagagggaa gccgcaagaa ccccgcccgc acctgccgtg
     3901 acctcaagat gtgccactct gactggaaga gtggagagta ctggattgac cccaaccaag
     3961 gctgcaacct ggatgccatc aaagtcttct gcaacatgga gactggtgag acctgcgtgt
     4021 accccactca gcccagtgtg gcccagaaga actggtacat cagcaagaac cccaaggaca
     4081 agaggcatgt ctggttcggc gagagcatga ccgatggatt ccagttcgag tatggcggcc
     4141 agggctccga ccctgccgat gtggccatcc agctgacctt cctgcgcctg atgtccaccg
     4201 aggcctccca gaacatcacc taccactgca agaacagcgt ggcctacatg gaccagcaga
     4261 ctggcaacct caagaaggcc ctgctcctca agggctccaa cgagatcgag atccgcgccg
     4321 agggcaacag ccgcttcacc tacagcgtca ctgtcgatgg ctgcacgagt cacaccggag
     4381 cctggggcaa gacagtgatt gaatacaaaa ccaccaagtc ctcccgcctg cccatcatcg
     4441 atgtggcccc cttggacgtt ggtgccccag accaggaatt cggcttcgac gttggccctg
     4501 tctgcttcct gtaaactccc tccatcccaa cctggctccc tcccacccaa ccaactttcc
     4561 ccccaacccg gaaacagaca agcaacccaa actgaacccc cccaaaagcc aaaaaatggg
     4621 agacaatttc acatggactt tggaaaatat ttttttcctt tgcattcatc tctcaaactt
     4681 agtttttatc tttgaccaac cgaacatgac caaaaaccaa aagtgcattc aaccttacca
     4741 aaaaaaaaaa aaaaaaaaaa agaataaata aataagtttt taaaaaagga agcttggtcc
     4801 acttgcttga agacccatgc gggggtaagt ccctttctgc ccgttgggtt atgaaacccc
     4861 aatgctgccc tttctgctcc tttctccaca ccccccttgg cctcccctcc actccttccc
     4921 aaatctgtct ccccagaaga cacaggaaac aatgtattgt ctgcccagca atcaaaggca
     4981 atgctcaaac acccaagtgg cccccaccct cagcccgctc ctgcccgccc agcaccccca
     5041 ggccctgggg acctggggtt ctcagactgc caaagaagcc ttgccatctg gcgctcccat
     5101 ggctcttgca acatctcccc ttcgtttttg agggggtcat gccgggggag ccaccagccc
     5161 ctcactgggt tcggaggaga gtcaggaagg gccacgacaa agcagaaaca tcggatttgg
     5221 ggaacgcgtg tcatcccttg tgccgcaggc tgggcgggag agactgttct gttctgttcc
     5281 ttgtgtaact gtgttgctga aagactacct cgttcttgtc ttgatgtgtc accggggcaa
     5341 ctgcctgggg gcggggatgg gggcagggtg gaagcggctc cccattttta taccaaaggt
     5401 gctacatcta tgtgatgggt ggggtgggga gggaatcact ggtgctatag aaattgagat
     5461 gcccccccag gccagcaaat gttccttttt gttcaaagtc tatttttatt ccttgatatt
     5521 ttttctttct tttttttttt ttttgtggat ggggacttgt gaatttttct aaaggtgcta
     5581 tttaacatgg gaggagagcg tgtgcgctcc agcccagccc gctgctcact ttccaccctc
     5641 tctccacctg cctctggctt ctcaggcctc tgctctccga cctctctcct ctgaaaccct
     5701 cctccacagc tgcagcccat cctcccggct ccctcctagt ctgtcctgcg tcctctgtcc
     5761 ccgggtttca gagacaactt cccaaagcac aaagcagttt ttccctaggg gtgggaggaa
     5821 gcaaaagact ctgtacctat tttgtatgtg tataataatt tgagatgttt ttaattattt
     5881 tgattgctgg aataaagcat gtggaaatga cccaaacata atccgcagtg gcctcctaat
     5941 ttccttcttt ggagttgggg gaggggtaga catggggaag gggccttggg gtgatgggct
     6001 tgccttccat tcctgccctt tccctcccca ctattctctt ctagatccct ccataacccc
     6061 actccccttt ctctcaccct tcttataccg caaacctttc tacttcctct ttcattttct
     6121 attcttgcaa tttccttgca ccttttccaa atcctcttct cccctgcaat accatacagg
     6181 caatccacgt gcacaacaca cacacacact cttcacatct ggggttgtcc aaacctcata
     6241 cccactcccc ttcaagccca tccactctcc accccctgga tgccctgcac ttggtggcgg
     6301 tgggatgctc atggatactg ggagggtgag gggagtggaa cccgtgagga ggacctgggg
     6361 gcctctcctt gaactgacat gaagggtcat ctggcctctg ctcccttctc acccacgctg
     6421 acctcctgcc gaaggagcaa cgcaacagga gaggggtctg ctgagcctgg cgagggtctg
     6481 ggagggacca ggaggaaggc gtgctccctg ctcgctgtcc tggccctggg ggagtgaggg
     6541 agacagacac ctgggagagc tgtggggaag gcactcgcac cgtgctcttg ggaaggaagg
     6601 agacctggcc ctgctcacca cggactgggt gcctcgacct cctgaatccc cagaacacaa
     6661 cccccctggg ctggggtggt ctggggaacc atcgtgcccc cgcctcccgc ctactccttt
     6721 ttaagctt
//