LOCUS       EU294177                7267 bp    DNA     linear   BCT 26-AUG-2008
DEFINITION  Escherichia coli serogroup O87 O antigen gene cluster, complete
            sequence.
ACCESSION   EU294177
VERSION     EU294177.1
KEYWORDS    .
SOURCE      Escherichia coli
  ORGANISM  Escherichia coli
            Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales;
            Enterobacteriaceae; Escherichia.
REFERENCE   1  (bases 1 to 7267)
  AUTHORS   Liu,B., Knirel,Y.A., Feng,L., Perepelov,A.V., Senchenkova,S.N.,
            Wang,Q., Reeves,P.R. and Wang,L.
  TITLE     Structure and genetics of Shigella O antigens
  JOURNAL   FEMS Microbiol. Rev. 32 (4), 627-653 (2008)
   PUBMED   18422615
REFERENCE   2  (bases 1 to 7267)
  AUTHORS   Liu,B., Knirel,Y.A., Feng,L., Perepelov,A.V., Senchenkova,S.N.,
            Wang,Q., Reeves,P.R. and Wang,L.
  TITLE     Direct Submission
  JOURNAL   Submitted (23-NOV-2007) TEDA School of Biological Sciences and
            Biotechnology, Nankai University, 23# HongDa Street, Tianjin
            300457, P. R. China
FEATURES             Location/Qualifiers
     source          1..7267
                     /organism="Escherichia coli"
                     /mol_type="genomic DNA"
                     /db_xref="taxon:562"
                     /note="serogroup: O87"
     misc_feature    158..7192
                     /note="O antigen gene cluster"
     gene            158..1309
                     /gene="wzx"
     CDS             158..1309
                     /gene="wzx"
                     /note="O antigen flippase"
                     /codon_start=1
                     /transl_table=11
                     /product="Wzx"
                     /protein_id="ACA24898.1"
                     /translation="MRVLGPEFFGVLGFCQASMQYLVLITDYGFNWTATQQIAKHRDN
                     KLKINEIFWSVIFAKIFLASISFILLFVICWNVPKYHDVWYVLLSFSPFLLGNVIYPI
                     WFFQGMEKMKAITVCTISARFLLIPLTFLLVHNSDDIWIAAIIQSSVNLLAGILSCCL
                     IFKYRWISGFEFNLIKVKESICEGWHVFISTSAISLYTTSTTVILGFVAGNTAVGYFN
                     AANTVRNAAQGLLNPVTQAIYPRVSALYEIDYVKALNLIKKMLRYIGGIAFLGSVILF
                     FLAPVIIKYGLGDSYNNAVSILRWMAFLPFIILLSNIFGVQTMLTHGYKKQFSNILTV
                     SGVLNLCIIYPLVCLYSADGAAIAMLITEIFVSFTMFCFLKNKKINIFK"
     gene            1317..2432
                     /gene="glf"
     CDS             1317..2432
                     /gene="glf"
                     /note="UDP-galactopyranose mutase"
                     /codon_start=1
                     /transl_table=11
                     /product="Glf"
                     /protein_id="ACA24899.1"
                     /translation="MKKYDYLIVGAGLYGSVCAHELAKKGKKILIIDKRKHIAGNVYT
                     YQKHGIHVHQYGAHIFHTNDKKIWDYVNSFVEFNRFTNSPLAIYQGELYNLPFNMNTF
                     YQLWGVKTPAEALKIIDEQRKELDGRLPANLEEKAISLVGRDIYEKLIKGYTEKQWGR
                     QARDLPAFIIARLPVRLTFDNNYFSDRYQGIPVGGYTRLVEKMIDSENIDIKLNTDYL
                     AHKEEYNSFSGKIIYTGPIDEYFNYQYGRLEYRSLRFETEVLNIDNFQGNAVVNYSDR
                     DVPYTRIIEHKHFDPVDTSKTIITREYPSECKETNEPYYPINDDKNMELYKKYRLLSK
                     EIHQNVHFGGRLAEYKYYDMHQVIASALKFIEDEING"
     gene            2416..3189
                     /gene="wfgL"
     CDS             2416..3189
                     /gene="wfgL"
                     /note="glycosyltransferase"
                     /codon_start=1
                     /transl_table=11
                     /product="WfgL"
                     /protein_id="ACA24900.1"
                     /translation="MRLMANIYIATHKNYPFPPGYIPLHVGKRLSSVYVPNAIGDDSK
                     NNISDLNPFFCELTGLYWIWQNDADDVIGLVHYRRYFKHKNDYITIKNKKIASCNDLI
                     KEFDSYDLILPKPSYLFKKTLKEQYIKYHHEDDLIKLRQIIEKKYPDYISTFDTVLNG
                     NKGYYCNMFIAKKNIIEPYFQWVFDILFELKSSLDISGYDDYQKRVFGFLSERLFAVW
                     IEYNKNRIQITHRSVVEIESNKVISVKRYIRNFLAKLTG"
     gene            3192..4346
                     /gene="wzy"
     CDS             3192..4346
                     /gene="wzy"
                     /note="O antigen polymerase"
                     /codon_start=1
                     /transl_table=11
                     /product="Wzy"
                     /protein_id="ACA24901.1"
                     /translation="MSYIISVSTLIVILFFERNRMILPLSAVIISGVVVFICLSGNYY
                     NGYDWINYLDNFQCLKYLGVYCWDKYDISYTLIVYLCSKIFDDYHSVVVVISVINTYC
                     LCFFAYKNCRNPLLFITIYFSLYAWVLYSETLRQSLAVSFVFLSLSAFNKKHFVKSLA
                     LLGVASTFHISALFFLVLYIIKYKTNLTYKLFVLSFLIGAVFLPYIAASFFYLNPRIL
                     SYFLDTNWNEAQNLSIIQVSTTLFSLLLLFMVWRRLYNNNMLITKDEWGKANLIMFSL
                     FSVLFALGKLHAQYFEVLSRVNYYIAIPAFVYLSNRIEDLRVTNKIIIYPFLMVYLLY
                     SPVRFFYSLQKFDEYNNFYVDKLLLNKKINIEYLKNTRCSYLGNKFSACK"
     gene            4362..4928
                     /gene="wfgM"
     CDS             4362..4928
                     /gene="wfgM"
                     /note="acetyltransferase"
                     /codon_start=1
                     /transl_table=11
                     /product="WfgM"
                     /protein_id="ACA24902.1"
                     /translation="MNGRGLFKLIKYPLNLIVCILRLIPNFLLVLVYDFLTMVPTKIG
                     VLLRYVVVKSMSRNIGDNVYIGRYVILKNIKKLHIGNNVSIHEFCYIDAVGSITIGDN
                     VSIAHNSSIISFEHTWSDTSVPIKYNKIITKPIIIEDDVWIGCGVRILSGSYVSKRSI
                     LAAGTVFKLSGEKNSIYVGMPSKKKKEI"
     gene            4891..6117
                     /gene="wfgN"
     CDS             4891..6117
                     /gene="wfgN"
                     /note="glycosyltransferase"
                     /codon_start=1
                     /transl_table=11
                     /product="WfgN"
                     /protein_id="ACA24903.1"
                     /translation="MWGCPLKRRKKFNMKILFCHDHIFKEYEGEYYSPGKISIEQMES
                     YQSLGDNITIVARSNKVTSLDSSKHNQITHNRVSFCAFPNASNFKSLILRKELLKKAI
                     KIASQYDIIIARLPSEIGSIFAQAGRKLNKPVLIEVVACVWDNLYYFGTIKAKLYAPL
                     AYVRMRNLVKQADFVHYVTSSFLQKRYPTKKGALTLSASDVILSTVSNSQRNLKKNNE
                     ISIGVVGSMDNKIKGISLAIKAVKILSNSTNEKITLHIIGPGDKKKYQNLASRLNLLE
                     NINFVGSLSDSKAVQKYLNEYIDIYIQPSYQEGMPRAVLEAMSCGIPCIVSCAGGMPE
                     IISSDYVHAKGDYKQLAHLIKKISSSEKIYRQESEKNHLLANKFSHERLIYKKNKFYN
                     LIKDRISEGSVYYNKS"
     gene            6086..7192
                     /gene="wfgO"
     CDS             6086..7192
                     /gene="wfgO"
                     /note="glycosyltransferase"
                     /codon_start=1
                     /transl_table=11
                     /product="WfgO"
                     /protein_id="ACA24904.1"
                     /translation="MKVLYIITKADEIGGAQIHIRDLSSRLKEDGHDVVVIVGEHGAL
                     VDELIKRGVAYHIVPSLVREINPIKDLRAVIEISKLISILDPDIISLHSSKAGIIGRL
                     AALRKKKPVIFTAHGWAFANGVSKNRQKLYCIIEKIIEPLASKIITVSEQDKQLALEL
                     NVSSHEKQVVIHNGMMQSSLPPRFVNRTSNKTVELISVARFSEQKDHRTLFVALSQIN
                     NLNWRLTLVGKGPLLEYYKTLARKLNIHERIQFLGERHDVAELMVRSDVFLLISKWEG
                     FPRSILEAMRAGLPVIASNVGGTSEAINDGITGFLVEREDVDGLKHKLCKLLSEPELC
                     FNMGQAGYQSFISNFTFDVMYQKTYYLYESLLKK"
ORIGIN      
        1 aatggtagct gtaagccaag ggcggtagcg tgtatatttt ttttgtatcg agatttccaa
       61 attgaaatta aatacaacat taaacaaaaa tatcatatac ttgggacttg ttcagggtag
      121 ttcatacata ctacctctta ttacgtttcc atatttggtg agggttcttg ggccggagtt
      181 ttttggggta ttgggttttt gtcaagcatc aatgcaatat ttggtgctga taacggatta
      241 cggattcaac tggactgcaa cccaacaaat tgcaaaacat cgtgataata agttgaagat
      301 taatgaaatt ttttggtctg taatatttgc aaaaatattt ctggcctcta tctcttttat
      361 cttgttattc gtcatatgtt ggaatgttcc caaatatcat gatgtttggt atgtattatt
      421 gtcattttca ccttttcttt taggcaatgt gatttatcct atatggtttt ttcaaggaat
      481 ggagaaaatg aaagcgatta ctgtatgtac aatttcagct cgatttttac ttattccatt
      541 gacatttttg cttgttcata attcagatga tatttggatt gctgctatta tccagagttc
      601 tgtgaattta ttagcgggaa tattatcatg ttgtttaata tttaaatata gatggatttc
      661 tggatttgaa tttaacttaa ttaaagttaa agagtctatt tgcgaaggtt ggcatgtgtt
      721 tatatcaaca tcagcaataa gtttatatac tacaagcaca actgttatat tagggtttgt
      781 agctggaaat actgcagtag gttattttaa tgcggcaaat actgtcagaa atgctgctca
      841 gggattgttg aatccagtta cacaagctat ttatcccaga gtaagtgcat tgtatgagat
      901 tgattacgta aaagctctaa acttgataaa aaaaatgttg agatatattg ggggaatagc
      961 gtttttgggc tctgtaatac tattctttct cgctcctgta attattaagt atggtttggg
     1021 agatagttat aataatgctg tcagcattct aagatggatg gcatttttac cctttataat
     1081 tttgttgagt aatatctttg gtgtgcaaac tatgttgact catggatata aaaaacaatt
     1141 tagcaatata ttgactgtaa gtggtgtttt aaatctatgc attatatatc ctttggtttg
     1201 tttatattca gcagatggtg ctgcgatagc aatgttaata actgaaatat ttgtatcctt
     1261 tacaatgttt tgttttttga aaaataaaaa aatcaacata tttaagtgaa tttgatatga
     1321 aaaagtatga ttatttaata gttggtgctg gattgtatgg ttctgtttgt gcacatgaac
     1381 ttgcaaaaaa aggaaagaag atattaatta ttgataaaag gaaacatata gctggcaatg
     1441 tctatacata tcaaaaacat ggcattcatg tgcaccagta tggagcgcat atattccata
     1501 cgaatgataa aaaaatttgg gattatgtaa attcattcgt tgagtttaac cgatttacta
     1561 actcaccatt agcaatttat caaggggagt tatataatct gccatttaat atgaatacgt
     1621 tttaccagtt gtggggagta aaaactccgg ctgaagcttt aaaaattatt gatgaacagc
     1681 gcaaagaact tgatggtcga ctgccggcaa atttggaaga gaaagcaata tctcttgtcg
     1741 gaagggatat atatgagaaa ttgataaagg gatatactga aaaacaatgg ggacgacagg
     1801 cacgtgactt accggcattt ataatagcac gactaccagt acgtctaact tttgacaata
     1861 attatttctc tgatcggtat caaggtattc cagttggagg atataccaga ttagttgaga
     1921 aaatgataga ttctgaaaac attgatataa agttgaatac tgattactta gcacataagg
     1981 aagagtataa tagttttagc ggcaaaatta tatacacagg tcctattgac gagtatttta
     2041 attatcagta cgggcgattg gaataccgtt ctttgcggtt tgaaactgaa gtgcttaaca
     2101 ttgataactt tcaaggaaat gccgttgtta attatagcga tagggatgtt ccatatacaa
     2161 gaattatcga gcacaagcat tttgatcctg ttgacacatc aaagaccatc ataacaagag
     2221 aatatccatc agaatgtaag gaaacaaacg agccttatta tccaatcaat gatgataaaa
     2281 atatggaatt gtataaaaaa tataggcttc tttctaaaga aattcatcaa aatgtacatt
     2341 ttggtggtcg attggctgag tataaatatt acgacatgca ccaggttata gcatcagcat
     2401 tgaaatttat tgaggatgag attaatggct aatatttata tagctacgca taaaaactat
     2461 ccattcccac caggctatat tcctttacat gttggtaagc gattatcaag tgtatatgtt
     2521 cctaacgcta taggcgatga tagcaaaaat aatatatccg acttaaatcc ttttttctgc
     2581 gaactgactg gtctatattg gatatggcaa aatgatgctg atgatgtcat aggtcttgtc
     2641 cattatcgtc gttactttaa acataaaaat gactatataa caataaagaa taaaaaaatt
     2701 gcatcttgca atgatttaat caaggagttt gatagttatg atcttatttt accaaaacct
     2761 tcatatcttt ttaaaaaaac gttgaaagag caatatatta aatatcatca tgaagatgat
     2821 ttgataaagc ttagacaaat tattgaaaaa aaatatccag actatatctc aacatttgat
     2881 acggtactga atggtaacaa gggatattat tgcaatatgt ttatagcaaa aaaaaacatc
     2941 atagaaccat attttcagtg ggtgtttgat attttgttcg agcttaagtc atctctagat
     3001 atctcaggat atgatgatta tcaaaaacgt gttttcggtt ttctttcaga aagactattt
     3061 gccgtttgga tagaatataa taaaaacaga atacaaatta cgcacagatc tgttgttgaa
     3121 attgaatcaa acaaagttat ttctgtaaaa agatatatta gaaatttttt ggctaaactg
     3181 actggatagt tatgtcgtac attatatctg tatcaacact tatagttatt ctgttttttg
     3241 aaagaaatag gatgatattg cctctttcag ctgttattat aagcggcgtt gttgttttta
     3301 tttgtttgtc tgggaattat tataatggat atgattggat taattatctg gataactttc
     3361 aatgtcttaa atatttaggg gtttattgct gggataaata cgatataagc tatactctca
     3421 ttgtctattt gtgttctaaa atattcgatg attatcatag cgttgtagtt gtaatatcag
     3481 taattaatac atactgcctt tgcttttttg cttataagaa ttgtaggaat cccttattat
     3541 tcattactat ttatttttca ttatatgctt gggtgttata cagtgaaaca ttaagacaat
     3601 ctttagcagt atcatttgtt tttttatcat tatctgcttt caataaaaaa cattttgtaa
     3661 aatctctggc tttattaggt gtcgcttcaa cttttcatat ttctgcttta ttttttttag
     3721 tgctgtatat tataaaatat aaaacaaatc ttacctataa gttatttgtt ttatcatttt
     3781 taataggggc tgtttttttg ccgtatatag cggcatcatt tttttatctt aatcctcgta
     3841 tattaagtta ttttttagat acaaactgga atgaagcaca aaatctcagt atcattcaag
     3901 tcagcacaac acttttctcg ctgctcttat tatttatggt atggagacgg ttatacaata
     3961 ataatatgct tataactaag gatgaatggg gaaaagcaaa tttaattatg ttttctcttt
     4021 ttagtgtttt atttgcctta gggaaattgc atgcgcaata ttttgaggtt ctttctcgtg
     4081 tgaattatta tatagctata cctgcatttg tttatttgtc aaataggatt gaagatttac
     4141 gcgtgacaaa taaaataata atatatccat ttttgatggt ttatttgctc tactctcctg
     4201 ttagattctt ttatagttta caaaaatttg atgaatataa taatttttat gtggacaagc
     4261 ttttattgaa taagaaaata aatatagaat atttgaaaaa tacaaggtgt tcttatctgg
     4321 gaaacaaatt ctctgcatgc aaatagtatg aggtttaact aatgaatggg cgtggattat
     4381 ttaaattaat caaatatcct cttaatctaa tagtatgcat attaagactg attcccaatt
     4441 tcttattggt cttagtttat gattttctca ctatggtacc aacaaaaatt ggtgttttgt
     4501 tgcgatatgt tgttgttaaa agcatgagta ggaatattgg ggataatgtt tacatcggtc
     4561 gttatgtgat tttgaaaaat ataaaaaaat tacacattgg caataatgtt tcgattcatg
     4621 agttttgtta tattgatgct gttggaagta ttactattgg tgataatgta tctatcgcac
     4681 ataactcatc gattatatca tttgagcata cttggtccga tacaagtgtg cccataaaat
     4741 ataataagat tattacaaag ccgataatca tcgaagatga tgtatggatt ggttgcggtg
     4801 ttagaattct ttcaggttcc tacgtatcta agcgaagcat acttgcagct ggcactgtct
     4861 ttaaattatc tggcgaaaaa aacagtatct atgtggggat gccctctaaa aagaagaaag
     4921 aaatttaata tgaaaatact attctgtcat gaccatatat ttaaggaata tgaaggtgaa
     4981 tattactcac cagggaaaat atccatagaa caaatggaaa gttatcaatc tctgggagat
     5041 aatataacaa ttgtcgccag atccaataaa gttacctctt tagattctag taagcataat
     5101 caaataactc ataatagagt ttcattctgt gcgtttccta acgcatctaa ctttaaatca
     5161 cttatcctgc ggaaggaatt attaaaaaaa gcgattaaaa ttgcaagtca atacgatatt
     5221 attatagcaa ggctaccatc tgaaattggt agtatattcg ctcaagctgg gcgtaaattg
     5281 aacaaaccgg ttttaataga agttgtagca tgtgtttggg ataatttata ttactttggt
     5341 acaattaagg ctaaattata tgcacctctt gcttatgtaa gaatgcgtaa tttagtaaaa
     5401 caagctgatt ttgtccatta tgttacttcg tcatttttac aaaaaaggta tccaacaaaa
     5461 aaaggggctt taactttatc tgcatcggat gtaatattat caacagtttc taactctcaa
     5521 agaaatctca aaaaaaataa tgagatttca attggtgttg ttggttctat ggataataaa
     5581 attaaaggaa tttcattagc aattaaagct gttaaaatat tgtcaaatag cacaaatgag
     5641 aaaatcactc ttcatattat tggtcctggt gacaagaaaa aataccaaaa tttagcctct
     5701 aggctaaatc ttttagaaaa tataaacttt gtggggtctt tgtctgattc aaaagctgta
     5761 caaaaatact taaatgaata tattgatatt tatatacaac catcttacca agaaggtatg
     5821 ccgcgagctg ttttagaagc tatgagttgt ggtattcctt gcattgttag ttgtgctgga
     5881 ggcatgcctg aaattatatc ttcagattat gttcatgcaa aaggtgatta taagcagtta
     5941 gctcatttaa ttaaaaaaat ttcttcttct gaaaaaatat atcgacaaga gtcagaaaaa
     6001 aatcatcttt tagcgaataa attttcgcat gagcgattga tatataaaaa aaataaattt
     6061 tataatttaa ttaaggatag aataagtgaa ggttctgtat attataacaa aagctgacga
     6121 aattggtggt gcacagattc acatacggga tttatcctca cgcttaaaag aggatggaca
     6181 tgatgtggtt gtaattgtag gtgaacatgg tgcattagtt gatgagttga ttaagagggg
     6241 agtggcgtat cacatcgttc cttctcttgt gcgtgaaatt aaccctataa aagacttaag
     6301 ggcagttata gaaataagta agttaattag tatattagat ccagatataa tttcattaca
     6361 ctcatcaaaa gccggtatta tagggcgact ggcagcttta cgaaaaaaaa aaccagttat
     6421 attcactgct catggatggg catttgcaaa tggagtaagt aaaaataggc aaaagttata
     6481 ttgtataata gaaaaaataa ttgaacctct agcaagtaaa attattacag tttctgaaca
     6541 agacaaacag ttagcattgg aattaaacgt ttcctctcat gaaaaacagg ttgtaataca
     6601 taatggcatg atgcagtcat cattgcctcc tcggtttgta aatagaactt ctaataagac
     6661 tgtcgagttg atatctgtag cacgattctc agagcaaaag gatcatagaa ccctttttgt
     6721 tgccttatct caaataaata atttaaactg gagattaacc ttagtaggta aaggcccact
     6781 tttagaatac tataagacgt tggctcgaaa attgaacatt catgaacgta ttcagtttct
     6841 tggtgagcga catgatgtcg ctgaattaat ggttaggagt gatgtgtttt tattgatatc
     6901 aaaatgggaa ggctttccgc gtagtattct tgaggctatg agagctggtt taccagttat
     6961 tgcttctaat gtaggaggaa catctgaagc tattaatgat ggaattacag ggtttcttgt
     7021 ggaaagagaa gatgttgatg ggttgaaaca taaattatgt aaattactat cagaacctga
     7081 gttgtgtttc aatatggggc aagctggtta tcagtctttc attagtaatt ttacttttga
     7141 tgttatgtac caaaaaactt actatttata cgaaagttta ttaaaaaaat gatttaattg
     7201 tttggataat aactgacagg agtaaacaat gtcaaagcaa cagatcggcg tcgtcggtat
     7261 ggcagtg
//