LOCUS       HUMFSHD                 3303 bp    DNA     linear   HUM 15-SEP-2007
DEFINITION  Homo sapiens FSHD gene for facioscapulohumeral muscular dystrophy,
            complete cds, 4Z4 tandem repeat unit.
ACCESSION   D38024
VERSION     D38024.1
KEYWORDS    .
SOURCE      Homo sapiens (human)
  ORGANISM  Homo sapiens
            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
            Catarrhini; Hominidae; Homo.
REFERENCE   1  (bases 1 to 3303)
  AUTHORS   Lee,J.
  TITLE     Direct Submission
  JOURNAL   Submitted (22-AUG-1994) to the DDBJ/EMBL/GenBank databases.
            Contact:Je Hyeon Lee
            National Institute of Neuroscience, NCNP, Department of
            Neuromusclar Research; 4-1-1 Ogawa-higashi, Kodaira, Tokyo 187,
            Japan
REFERENCE   2
  AUTHORS   Lee,J., Goto,K., Matsuda,C. and Arahata,K.
  TITLE     Characterization of a tandemly repeated 3.3kb KpnI unit in the
            facioscapulohumeral muscular dystrophy (FSHD) gene region on
            chromosome 4q35
  JOURNAL   Muscle Nerve 2, S6-S13 (1995)
COMMENT     
FEATURES             Location/Qualifiers
     source          1..3303
                     /chromosome="4"
                     /clone="c51"
                     /db_xref="taxon:9606"
                     /map="4q35-qter"
                     /mol_type="genomic DNA"
                     /organism="Homo sapiens"
     repeat_region   1..3303
                     /function="unknown"
                     /note="3303bps KpnI fragment in FSHD gene region contains
                     other repeat and sequence motive"
                     /rpt_family="D4Z4"
                     /rpt_type=tandem
     repeat_region   1..300
                     /function="unknown"
                     /note="region with similarity to Lsau(GenBank X59423),
                     Part of tandem repeat D4Z4"
                     /rpt_type=other
     misc_feature    393..578
                     /function="unknown"
                     /note="extremly G-rich region in 186bps. Part of tandem
                     repeat locus D4Z4"
     repeat_region   398..578
                     /note="microsatellite of GGAGG, Part of tantem repeat
                     locus D4Z4"
                     /rpt_type=direct
     CDS             417..2978
                     /gene="FSHD"
                     /note="ORF"
                     /product="facioscapulohumeral muscular dystrophy"
                     /protein_id="BAA07227.1"
                     /translation="MERGTGETRGAEGTLGGRQGGREAGRNGGRDRATQGLGAGPREP
                     GTDGGRKAGRKSGPRPPGVAGPPASGKTVSVRRGLRAGPTAAAPAGGAPPIRPGSGAQ
                     GVGGFLRDKRPGLGLPSGLHPRGSQTAHPQAEPCNAARGPQTRPRRSHTQDDGGVILV
                     SEWLCPPEGGLLLTSLRPPKGWPCRLFAPGALRHPETCREGCKPGMVPSLSLPGSKPA
                     TLQTPPRCRTRESIVRPSRRGGISSLGSRSGLLRGNEREPHACVCETVPATATPTGIA
                     SFTERGPGTLKTPTEVQFHTPLHPPRLVSPCCRRVGAQRAASRSRGIPGEVRRAGPRN
                     APPSPLPPLPLPLRLSGPTTTTATTPPPPPPPPTTTTTTTPPAGPRPRRPGSLPGWGG
                     LSQGGSPPFMKGWSLPACGPLQGRLAGWLAVRAGLLAAPAAVHSPAEVHGSPPASLCP
                     RPSVKFRPGLTAMALPTPSDSTLPAEARGRGRPRRLVWTPSQSEALRACFERNPYPGI
                     ATRERLAQAIGIPEPRVQIWFQNERSRQLRQHRRESRPWPGRRGPPEGRRKRTAVTGS
                     QTALLLRAFEKDRFPGIAAREELARETGLPESRIQIWFQNRRARHPGQGGRAPAQAGG
                     LCSAAPGGGHPAPSWVAFAHTGAWGTGLPAPHVPCAPGALPQGAFVSQAARAAPALQP
                     SQAAPAEGVSQPAPARGDFAYAAPAPPEPGRSPTLRLLGGLRTRAKAGRTGTRSATAC
                     RAPARWHSLGPLKRGRRPRGACATHVPGESVVGLGPGSPGRRGGVGTPSRGSSTSPAR
                     APGTPPPPRGRGRCKASRRPPRRSRSRRPGLHSPAACCWMSSWRARSFCSRRNLS"
     misc_feature    1405..1554
                     /function="unknown"
                     /note="Extremly C-rich region(69%) in 186bps. Part of
                     tandem repeat locus 4DZ4."
     repeat_region   1470..1536
                     /note="microsatellite of CCA, Part of tandem repeat locus
                     D4Z4"
                     /rpt_type=direct
     repeat_region   1590..1703
                     /note="microsatellite of GGCT, Part of tandem repeat
                     locus D4Z4"
                     /rpt_type=direct
     misc_feature    1863..2037
                     /function="unknown"
                     /note="paired type homeodomain seq I,
                     translation;PRRLVWTPSQSEALRACFERNPYPGIATRERLAQAIGIPEPRVQIW
                     FQNERSRQLR"
     misc_feature    2082..2264
                     /function="unknown"
                     /note="paired type homeodomain seq II,
                     translationGRRKRTAVTGSQTALLLRAFEKDRFPGIAAREELARETGLPESRIQI
                     WFQNRRARHPGQG"
BASE COUNT          486 a         1257 c         1142 g          418 t
ORIGIN      
        1 ggtaccagca ggtgggccgc ctactgcgca cgcgcgggtt tgcgggcagc cgcctgggct
       61 gtgggagcag cccgggccag agctctcctg cctctccacc agcccacccc gccgcctgac
      121 cgccccctcc ccacccccca ccccccaccc ccggaaaacg cgtcgtcccc tgggctgggt
      181 ggagaccccc gtcccgcgaa acaccgggcc ccgcgcagcg tccgggcctg acaccgctcc
      241 gccggctcgc ctcctcctgt cgcccccggg ccaccgtcgc ccgcccgccc gggcccctgc
      301 gggcccctgc agccgcccag ctgccagcac gggcggctgg cggcggaacg cagaccccag
      361 gcccggcgca caccggggac gctgagcgtt ccaggcggga gggaaggcgg gcagagatgg
      421 agagaggaac gggagagact agaggggcgg aagggacgtt aggagggagg cagggaggca
      481 gggaggcagg gaggaacgga gggagagaca gagcgacgca gggactgggg gcggggccga
      541 gggagccggg gacggacggg gggaggaagg cagggaggaa aagcggtcct cggcctccgg
      601 gagtagcggg accgcccgcc tccgggaaaa cggtcagcgt ccggcgcggg ctgagggctg
      661 ggcccacagc cgccgcgccg gccggcgggg caccacccat tcgccccggt tccggggccc
      721 agggagtggg cggtttcctc cgggacaaaa gaccgggact cgggttgccg tcgggtcttc
      781 acccgcgcgg ttcacagacc gcacatcccc aggctgagcc ctgcaacgcg gcgcgaggcc
      841 cacagacccg gccacggagg agccacacgc aggacgacgg aggcgtgatt ttggtttccg
      901 agtggctttg ccctcccgaa ggcggcctgt tgctcacgtc tctccggccc ccgaaaggct
      961 ggccatgccg actgtttgct cccggagctc tgcggcaccc ggaaacatgc agggaagggt
     1021 gcaagcccgg catggtgcct tcgctctcct tgccaggttc caaacccgcc acactgcaga
     1081 ctcccccacg ttgccgcacg cgggaatcca tcgtcaggcc atcacgccgg ggaggcatct
     1141 cctctctggg gtctcgctct ggtcttctac gtggaaatga acgagagcca cacgcctgcg
     1201 tgtgcgagac cgtcccggca acggcgacgc ccacaggcat tgcctccttc acggagagag
     1261 ggcctggcac actcaagact cccacggagg ttcagttcca cactcccctc caccctccca
     1321 ggctggtttc tccctgctgc cgacgcgtgg gagcccagag agcggcttcc cgttcccgcg
     1381 ggatccctgg agaggtccgg agagccggcc cccgaaacgc gcccccctcc cccctccccc
     1441 ctctccccct tcctcttcgt ctctccggcc ccaccaccac caccgccacc acgcctcccc
     1501 caccaccccc cccccccacc accaccacca ccaccacccc gccggccggc cccaggcctc
     1561 gacgccctgg gtcccttccg gggtggggcg ggctgtccca ggggggctca ccgccattca
     1621 tgaaggggtg gagcctgcct gcctgtgggc ctttacaagg gcggctggct ggctggctgg
     1681 ctgtccgggc aggcctcctg gctgcacctg ccgcagtgca cagtccggct gaggtgcacg
     1741 ggagcccgcc ggcctctctc tgcccgcgtc cgtccgtgaa attccggccg gggctcaccg
     1801 cgatggccct cccgacaccc tcggacagca ccctccccgc ggaagcccgg ggacgaggac
     1861 ggccacggag actcgtttgg accccgagcc aaagcgaggc cctgcgagcc tgctttgagc
     1921 ggaacccgta cccgggcatc gccaccagag aacggctggc ccaggccatc ggcattccgg
     1981 agcccagggt ccagatttgg tttcagaatg agaggtcacg ccagctgagg cagcaccggc
     2041 gggaatctcg gccctggccc gggagacgcg gcccgccaga aggccggcga aagcggaccg
     2101 ccgtcaccgg atcccagacc gccctgctcc tccgagcctt tgagaaggat cgctttccag
     2161 gcatcgccgc ccgggaggag ctggccagag agacgggcct cccggagtcc aggattcaga
     2221 tctggtttca gaatcgaagg gccaggcacc cgggacaggg tggcagggcg cccgcgcagg
     2281 caggcggcct gtgcagcgcg gcccccggcg ggggtcaccc tgctccctcg tgggtcgcct
     2341 tcgcccacac cggcgcgtgg ggaacggggc ttcccgcacc ccacgtgccc tgcgcgcctg
     2401 gggctctccc acagggggct ttcgtgagcc aggcagcgag ggccgccccc gcgctgcagc
     2461 ccagccaggc cgcgccggca gagggggtct cccaacctgc cccggcgcgc ggggatttcg
     2521 cctacgccgc cccggctcct ccggagccgg ggcgctctcc caccctcagg ctcctcggtg
     2581 gcctccgcac ccgggcaaaa gccgggagga ccgggacccg cagcgcgacg gcctgccggg
     2641 cccctgcgcg gtggcacagc ctgggcccgc tcaagcgggg ccgcaggcca aggggtgctt
     2701 gcgccaccca cgtcccaggg gagtccgtgg tggggctggg gccggggtcc ccaggtcgcc
     2761 ggggcggcgt gggaacccca agccggggca gctccacctc cccagcccgc gcccccggga
     2821 cgcctccgcc tccgcgcggc aggggcagat gcaaggcatc ccggcgccct cccaggcgct
     2881 ccaggagccg gcgccctggt ctgcactccc ctgcggcctg ctgctggatg agctcctggc
     2941 gagcccggag tttctgcagc aggcgcaacc tctcctagaa acggaggccc cgggggagct
     3001 ggaggcctcg gaagaggcgc ctcgctggaa gcacccctca gcgaggaaga ataccgggct
     3061 ctgctggagg agctttagga cgcggggttg ggacggggtc gggtggttcg gggcagggcg
     3121 gtggcctctc tttcgcgggg aacacctggc tggctacgga ggggcgtgtc tccgccccgc
     3181 cccctccacc gggctgaccg gcctgggatt cctgccttct aggtccaggc ccggtgagag
     3241 actccacacc gcggagaact gccattcttt cctgggcatc ccggggatcc cagagccggc
     3301 cca
//