LOCUS       KJ778788                7791 bp    DNA     linear   BCT 29-MAR-2016
DEFINITION  Escherichia coli strain H 319 serotype O96:H19 O-antigen gene
            cluster, complete sequence.
ACCESSION   KJ778788
VERSION     KJ778788.1
KEYWORDS    .
SOURCE      Escherichia coli
  ORGANISM  Escherichia coli
            Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales;
            Enterobacteriaceae; Escherichia.
REFERENCE   1  (bases 1 to 7791)
  AUTHORS   DebRoy,C., Fratamico,P.M., Yan,X., Baranzoni,G., Liu,Y.,
            Needleman,D.S., Tebbs,R., O'Connell,C.D., Allred,A., Swimley,M.,
            Mwangi,M., Kapur,V., Raygoza Garay,J.A., Roberts,E.L. and Katani,R.
  TITLE     Comparison of O-Antigen Gene Clusters of All O-Serogroups of
            Escherichia coli and Proposal for Adopting a New Nomenclature for
            O-Typing
  JOURNAL   PLoS ONE 11 (1), E0147434 (2016)
   PUBMED   26824864
  REMARK    Publication Status: Online-Only
REFERENCE   2  (bases 1 to 7791)
  AUTHORS   Yan,X., Fratamico,P.M., Tebbs,R.S., O'Connell,C.D., Swimley,M.,
            Baranzoni,G.M., Debroy,C. and Liu,Y.
  TITLE     Direct Submission
  JOURNAL   Submitted (30-APR-2014) Molecular Characterization of Foodborne
            Pathogens Research Unit, USDA-ARS, 600 East Mermaid Lane, Wyndmoor,
            PA 19038, USA
COMMENT     ##Assembly-Data-START##
            Assembly Method       :: CLC Genomics Workbench v. 7.0
            Coverage              :: >50X
            Sequencing Technology :: IonTorrent
            ##Assembly-Data-END##
FEATURES             Location/Qualifiers
     source          1..7791
                     /organism="Escherichia coli"
                     /mol_type="genomic DNA"
                     /strain="H 319"
                     /serotype="O96:H19"
                     /db_xref="taxon:562"
     misc_feature    1..7791
                     /note="O-antigen gene cluster"
     gene            72..1319
                     /gene="wzx"
     CDS             72..1319
                     /gene="wzx"
                     /codon_start=1
                     /transl_table=11
                     /product="O-antigen flippase"
                     /protein_id="AIG62624.1"
                     /translation="MKINTTLNKNILYLAVVQGSSYILPLITFPYLVRVLGPELFGVL
                     GFCQASMQYLVLLTDYGFNWTATQQVAKNKNDIVKLTRIFWSVFFAKVFLASISFILL
                     AACCFLIERYKELWFVLFSFSPLVLGNVIYPVWFFQGMEKMKWITICTITARCLVIPL
                     TFIFVKNGQDVWVAALIQGMVNLLAGLLGLCLIKKKRWVNTIIVDYIDIKQCLKDGWH
                     VFISTSAISLYTTSTTVILGFVAGPLAVGYFNVANTIRNAAQGLLTPFTQSIYPRINA
                     VFDSDYLQAIKIIKKSLRYVGGLAFLGSVFLYLLAPFIIKIGVGGGYEESISVLRIMA
                     FLPFIIVLSNIFGVQTMLTHNYKRQFSQILLVSGVANLIIIYPLVIMYSANGAAISLL
                     VTELFVTAQMYLFLRAKKIYLIK"
     gene            1331..2428
                     /gene="glf"
     CDS             1331..2428
                     /gene="glf"
                     /codon_start=1
                     /transl_table=11
                     /product="UDP-galactopyranose mutase"
                     /protein_id="AIG62625.1"
                     /translation="MYDYLIVGAGLFGAVCAHELNKKGKKLLVIDRRDHIAGNAYTEN
                     NGGIQVHKYGAHIFHTNDKDIWNYVNSFVEFNRFTNSPLASYGDKLYNLPFNMNTFYQ
                     LWGVKTPAEALAIINEQRKVLGDKTPSNLEEKAISLVGFDIYEKLIKGYTEKQWGRKA
                     TDLPSFIINRLPVRFTFDNNYFSDLYQGIPIGGYTKMVEKMLDGIETRLGVDFLKNRD
                     SFRKIADKIIYTGPIDEFYDYCFGMLEYRSLRFENEVINSNNYQGNAVINYTESHVPY
                     TRIIEHKHFDPIETNHTVITREYPCEWKKGDEPYYPVNDNLNMELFKKYKQLANSENN
                     IIFGGRLGEYKYYDMHQVIKSALNLMVKLDK"
     CDS             2425..3339
                     /codon_start=1
                     /transl_table=11
                     /product="N-glycosyltransferase"
                     /protein_id="AIG62626.1"
                     /translation="MKVQHRVAVVLVTYNRVALLDRAIQALYAQTYPVHKIIVVNNAS
                     TDDTANYLAQKNDIVHLQLPKNTGGAGGFHEGIKKACSMDVDFIWVMDDDAVPNNNTL
                     KGLINDAQFLEASGDNWGFLCSKVVSEDNESINVPSISRKRNRSGYLSWSEKAENGLI
                     GVDTATFVSVLMKKEIPFLVGLPVKEMFIWGDDTEYTWRISTLKNCYLSANSTIYHKR
                     ISASALSVVTEKEKTRLNWYIYYYRNNLYARRKHGSKKDMLVFLISTIKDIKDIFCKS
                     KNNKLLRLKCLLIGIFKGVVFNPQIKNV"
     gene            3377..4627
                     /gene="wzy"
     CDS             3377..4627
                     /gene="wzy"
                     /codon_start=1
                     /transl_table=11
                     /product="O-antigen polymerase"
                     /protein_id="AIG62627.1"
                     /translation="MTSSNKILSLLFGVILFLPLFPVQLNETQYHVTLAILLGLILIF
                     LMSWKQLGGFKGGSPVLYYYLCLQLLLILSGVMGVDTVKEYQDLISLFRPMVLLSSFY
                     LLYCLTSRNRDFFRTLIKPMKFICMAMFIWAVYESIIQNNIFQSINYLLYKMERKDDI
                     KNVAVTFFFLPYYSAFVSVFMIAFFYTCRYTYKIRGTYKYIFMSFCCIVLTQSKTGVV
                     AALFLIVCCELISSSQLKRIFCAVCVIILFFISFYFYDYLLMYLNLYFPGNFTRTVSQ
                     LADDPSSAYTLSARIEQFLSIYSQTMQHIPFIGYGLGRDLLIESWPATIYVRYGWIGI
                     CVFLIFNFFLIFKGICYYLTHTSNAFRYQAIIFSFWIISTLISQMSAMMTENTKTSFI
                     YLVYLVIFMQLIYKPHKINKSNLG"
     gene            4632..5753
                     /gene="mfpsA"
     CDS             4632..5753
                     /gene="mfpsA"
                     /codon_start=1
                     /transl_table=11
                     /product="mannosylfructose-phosphate synthase"
                     /protein_id="AIG62628.1"
                     /translation="MGKHALLVNGSYLPTVGGVENSIRSIAAELVKEGWKVDVVCSDE
                     GKYSRYEKTNNVSVYRYAKKGFFRSLYNSFRVLKEINKKYQLIICRNHTLFIILKFAG
                     YKNIHYIIPGVYHYQNKSEMRGSLSKKIKYASNVFIQIVSFNISKLKYVFSPTMEDQV
                     CSVSIFNKSVKKIYPGVDDTRFYKLDRKLKNNLKLALGFDNDSKIVLGLGRFVDVKNF
                     EHLIKCVPYLPCDYKVVLVGGGDNLTVYKKIIGELNLINRVFIFESTLEPEKFYQIAD
                     VFCLTSTYEPFGQVLIEASFCGLNIVAFDSDLKGIDTATNSIFIHDCDFYFPVSSFEL
                     SDFAKEIINAVTCNRSSLCQNEFNKQYNWKALIERLSID"
     gene            5731..6693
                     /gene="epsJ"
     CDS             5731..6693
                     /gene="epsJ"
                     /codon_start=1
                     /transl_table=11
                     /product="glycosyl transferase"
                     /protein_id="AIG62629.1"
                     /translation="MRDYQLIKYSFIIPCYNVSKYILTAIESIPVREDIEIIIIDDGS
                     TDGLSNAISSYKGNKIIYFHKENGGVSSARNKGIDLATGKYLLFFDGDDYYSEDLINY
                     LDKGFDNAQAKMITFGYNHLANNRLKYYSPGTTGIKSTRVLIGQILLRQSFQCMCSFA
                     VESDLVKNNNLRFNEVTYYYEDIEFQLKVMSLMNFVFVIDKPLFFYISRKKSATNSFV
                     NEKHFSLFYAIDRLENILDADMNDSLTYFRWYSLFWIFRLAFKNGCTKKSISILKTLK
                     LSNFKYNLIPCSQGRFKYNLIWLFQKFPIPFISFILNLRAIRNN"
     gene            6708..7631
                     /gene="glfT1"
     CDS             6708..7631
                     /gene="glfT1"
                     /codon_start=1
                     /transl_table=11
                     /product="galactofuranosyl transferase"
                     /protein_id="AIG62630.1"
                     /translation="MNTHKVIAVIVTYRRKNFLDKVLSALLNQTVPLHKVIVVDNNSS
                     DGTDDIVKEYINKFPEVISYHNTHENLGGAGGFYTGMKLIEKYNYNYDYAWLMDDDLI
                     PNNDCLEIMLTANIKGIIQPMRFNLDGSCAELSALKYDLNSFFRLKPKGDTVKDFVAS
                     NPLLPDFINIETVPFEGPLIHKDIVTAVGLPDPRFFIFGDDTDYSIRTLKLNYPIICM
                     PRARASRLLINNQRNDLLSWKGFFMLRNLFHLYFKHGNGYIARTKPYIIAVCYLLLCL
                     VKGNFKQSLITVDALKSARLLMNNDKYKPKA"
ORIGIN      
        1 ctctggtagc tgtaaagcca ggggcggtag cgtgtgtaaa aaacacatta acattattct
       61 gatggatacg tgtgaaaata aatacgactt taaataaaaa tattttatat ttagctgttg
      121 ttcaagggag ttcatatatt cttccattaa tcacttttcc ttatttagtg agagtgttag
      181 gtcccgagtt atttggggtg ctaggttttt gtcaggcatc aatgcaatac ttggtgttat
      241 taactgacta tggatttaat tggacggcaa ctcagcaagt agctaaaaat aaaaatgaca
      301 tcgtaaaatt aactaggatt ttttggtctg ttttttttgc aaaagtattc ttggcaagta
      361 tttcgtttat acttttagct gcatgttgtt ttcttattga aagatataaa gaattgtggt
      421 ttgtattatt ttcatttagt cccttggtat tggggaatgt gatttatcct gtatggtttt
      481 ttcagggtat ggaaaaaatg aaatggataa ctatatgtac tattacggct cgatgtttag
      541 tgataccttt aacatttatc tttgttaaaa atggacaaga cgtatgggtt gctgcactta
      601 tccaggggat ggttaactta ttagcaggcc tactgggatt atgtttgatt aagaaaaaaa
      661 gatgggttaa tacgattatc gttgattata ttgatattaa acagtgctta aaggatggtt
      721 ggcacgtgtt tatttcaaca tcagctataa gtctttatac aacaagcaca acagtgattc
      781 tcgggtttgt ggcgggacct ttggctgtag ggtattttaa tgttgcaaat actatacgta
      841 atgccgcgca agggttgcta acacctttta ctcaatctat atatcctaga attaacgcgg
      901 tctttgatag tgattactta caggcaataa aaataataaa aaaatcgcta cgctatgttg
      961 gaggtttagc ttttttggga tcagtattct tatatctatt ggctccattt ataataaaaa
     1021 taggtgtcgg aggggggtat gaagagtcaa tatctgtttt gcgaataatg gcatttttac
     1081 catttattat tgtgttaagt aatatatttg gtgttcaaac aatgttgact cataattata
     1141 agagacagtt tagccaaata ctgctagtta gtggcgttgc aaatcttata attatttatc
     1201 cattagtaat aatgtattct gctaatggtg ctgcaatttc tttattagta acggagttgt
     1261 ttgttacggc tcaaatgtac ttatttttac gggctaaaaa aatatattta ataaagtaat
     1321 aggagatgat atgtacgact atcttattgt tggtgcgggg ttgtttgggg ctgtttgtgc
     1381 gcatgagttg aataaaaaag ggaaaaaatt attggtaatt gatcgaaggg accacattgc
     1441 tggtaatgct tacactgaaa ataatggtgg tattcaagtt cataaatatg gggcgcatat
     1501 attccataca aatgataaag atatatggaa ttatgttaat agttttgttg agtttaatcg
     1561 gtttactaat tcacctcttg ctagttatgg agataaactt tacaatttac cctttaatat
     1621 gaatacgttt tatcagcttt ggggagttaa gacgcctgct gaagcattag ctataattaa
     1681 tgaacagcga aaagttttag gtgataagac acctagtaat cttgaggaaa aggctatttc
     1741 tttagtcggc tttgatattt atgaaaaatt gatcaaagga tatactgaaa agcaatgggg
     1801 aagaaaggcc actgatcttc catcttttat tatcaatcgg ttgccagtac gatttacttt
     1861 tgacaataat tatttttctg atctttatca ggggatacct attggtggtt ataccaagat
     1921 ggtggaaaaa atgttagatg gtatagaaac taggcttggt gtcgattttt taaagaatag
     1981 agattctttc agaaagatag ctgataaaat tatttatact gggccgatag atgagtttta
     2041 tgattactgc tttggaatgc tcgaatatag atctttaaga tttgaaaatg aagttattaa
     2101 tagtaataat tatcaaggaa atgctgttat aaattatact gaatcacatg taccatacac
     2161 aagaataata gaacacaaac actttgaccc aattgaaaca aaccatacag ttatcacaag
     2221 agaatatcct tgtgaatgga aaaaaggtga tgaaccttac tatccagtaa atgacaatct
     2281 gaacatggaa ttattcaaaa aatataaaca attagcaaat tctgaaaata atatcatttt
     2341 tggtggacgg cttggggagt ataaatatta cgatatgcat caagtaatta aatcggcatt
     2401 aaatttgatg gtgaaattag acaaatgaag gttcaacata gagttgctgt cgttttggta
     2461 acatataacc gtgttgcatt gctagataga gctattcaag cattatatgc gcaaacatat
     2521 ccagtccaca aaattatagt tgtaaataat gcttctactg atgataccgc aaattattta
     2581 gcacaaaaaa atgatattgt tcacttacaa ctccctaaaa atacaggtgg ggctggtggt
     2641 tttcatgaag gtataaaaaa agcttgtagc atggatgttg attttatttg ggtcatggat
     2701 gatgatgcag ttccaaataa taatacacta aaaggtctaa ttaacgatgc acaatttctc
     2761 gaagcatctg gtgataattg gggattttta tgcagtaagg ttgttagcga agataatgaa
     2821 tctattaatg taccatctat atcaaggaaa agaaatcgtt caggttattt atcatggagt
     2881 gaaaaggctg aaaatggtct aataggtgtt gatacagcaa catttgtatc agtacttatg
     2941 aaaaaagaaa tacctttttt ggttgggctt cccgttaaag agatgtttat ttggggagat
     3001 gatactgaat acacttggcg aattagcacg ctaaaaaatt gttatttatc tgcaaatagt
     3061 actatttatc ataagagaat cagtgctagt gctttaagtg ttgtaacgga aaaagaaaaa
     3121 acaagactta attggtatat ttattattac aggaataatt tatatgcgcg gagaaaacat
     3181 ggttctaaaa aagatatgct tgtttttctt atttcaacta ttaaagacat aaaggatatt
     3241 ttttgtaaat ccaaaaataa taaattacta cgtctaaagt gtctattgat tggtattttt
     3301 aaaggggtgg tgtttaatcc ccaaattaag aacgtttaaa cacattaaaa gactaaagtt
     3361 atatggtgaa tatattgtga cttctagtaa caaaatactt tcattattat ttggggttat
     3421 cctttttctg cctctatttc ctgttcagct aaatgaaaca caatatcatg tgactttagc
     3481 cattttatta ggtcttatct taatctttct tatgtcgtgg aaacaattag gaggtttcaa
     3541 aggcgggagc ccagtgcttt attattatct ctgcttgcaa ttattattga ttcttagcgg
     3601 cgtaatgggg gttgatacag ttaaagaata ccaagattta attagcttat ttcgtcctat
     3661 ggttttattg tcatcatttt atttacttta ttgtctgaca tctagaaata gagatttctt
     3721 taggacactt attaagccaa tgaaatttat atgtatggct atgtttatat gggctgttta
     3781 cgagagcatt atacaaaata acatatttca aagtattaat tatcttttat acaaaatgga
     3841 gcgtaaagat gacattaaaa atgttgcagt aacattcttt tttttacctt attattctgc
     3901 ttttgtatct gtctttatga tagcattttt ttatacatgt cgttacactt ataaaataag
     3961 aggaacatat aaatatatat tcatgtcttt ttgttgtatt gttcttacac aatcgaaaac
     4021 gggcgttgta gctgcgttgt ttttaattgt ttgttgcgag ttaatatcaa gttcacaatt
     4081 aaaaagaata ttttgtgcag tatgtgtgat tattttgttt tttatatcgt tctattttta
     4141 cgattattta ttaatgtatc taaatttata tttccctgga aattttactc gaactgtttc
     4201 acaacttgcg gatgatccta gtagtgcata tacattatct gcgagaattg agcagtttct
     4261 gagcatctat agtcaaacaa tgcaacatat accatttata ggttatggat tagggcgtga
     4321 tttattaata gagtcgtggc ctgccactat ctatgttcga tatggttgga ttggtatttg
     4381 tgtattttta atattcaatt tttttctaat atttaaaggg atttgttatt atttaacgca
     4441 tacaagcaat gcattccgat accaagcgat tattttttca ttttggataa ttagtacgct
     4501 tatttcacaa atgagtgcaa tgatgactga aaatacaaag acatcattca tatacttggt
     4561 atatttggtc atatttatgc aactaatcta caagcctcat aaaattaata aatcaaatct
     4621 aggttaatca aatgggtaaa catgcattac tagtaaatgg gtcatacctt cctactgtcg
     4681 gtggagtaga gaattcaatt cgcagtatag ctgcagagct tgttaaagaa ggttggaaag
     4741 tcgatgtggt atgtagtgat gaaggtaagt acagtcgtta tgaaaaaaca aataatgtaa
     4801 gtgtgtatcg ttatgccaaa aaaggttttt ttcggtcatt atataattcg tttagagtat
     4861 taaaagaaat aaacaaaaag tatcaactaa ttatctgtag gaatcacact ctttttataa
     4921 ttcttaaatt tgctggatat aaaaacattc attacataat tcctggtgtt taccattatc
     4981 aaaataaaag tgaaatgaga ggctcattat caaaaaaaat aaaatatgct agcaatgttt
     5041 ttatccagat tgtttctttt aatatttcca aattaaagta tgtattctca cccacaatgg
     5101 aagatcaagt ctgttcagta tctatattta ataagtctgt taaaaaaata tatccaggtg
     5161 tggacgatac aagattctat aagctagata gaaaattaaa aaataattta aagttagctt
     5221 tgggatttga taatgattca aaaattgttc ttggtctcgg gagatttgta gatgtcaaaa
     5281 atttcgagca tcttattaaa tgtgttcctt atttgccctg tgactataaa gtagtcttgg
     5341 ttggtggggg ggataattta acggtttata aaaaaataat aggtgagctt aatttaatta
     5401 accgtgtttt tattttcgaa agtacattag agccagaaaa attttatcaa atagctgatg
     5461 ttttttgcct gacatcaaca tatgagccct ttggccaagt tttgatagaa gcttcatttt
     5521 gtggtttgaa cattgtcgca ttcgattctg atctgaaagg tattgatact gctactaatt
     5581 caatttttat tcatgattgt gatttttatt ttccagtatc ttcctttgag ctatctgatt
     5641 ttgctaaaga aattattaat gcagttacat gcaatagaag ctcattatgc caaaatgagt
     5701 ttaataaaca gtataactgg aaagctttaa ttgagagatt atcaattgat taaatattca
     5761 tttatcatac catgttacaa tgtatccaaa tacatcttaa ctgctattga gagtattcct
     5821 gtcagggaag atatagaaat tatcattatt gatgatggtt caacggatgg tttaagtaat
     5881 gctatttctt catataaagg aaacaaaata atatattttc acaaagaaaa tggtggtgtt
     5941 tcatctgcaa gaaataaggg aattgattta gccacaggta aatatttgct tttttttgat
     6001 ggagatgatt actattccga ggatctcatt aattatcttg ataaaggttt tgataatgct
     6061 caagcaaaaa tgattacttt tggttataat cacctcgcga acaatagatt aaaatattat
     6121 tcacccggaa cgactggcat aaaaagcacg agagttttaa tagggcaaat attattacga
     6181 caatcatttc agtgtatgtg ttcattcgcg gtggaaagtg atcttgttaa aaataataat
     6241 cttagattta atgaagtaac ttattattat gaagatattg aatttcagct taaagtaatg
     6301 tcattgatga atttcgtttt tgtaattgat aaaccattat ttttttatat atcgagaaag
     6361 aagtcagcaa ccaattcatt tgttaatgaa aaacattttt ctctttttta tgctatagat
     6421 agacttgaaa atattcttga tgcagatatg aatgattcct taacatattt tagatggtat
     6481 tctctttttt ggatttttag attggcattt aaaaatggat gtacaaagaa aagtattagt
     6541 attctgaaaa cattgaaatt aagcaatttt aaatacaatc ttattccttg ctctcaagga
     6601 cgatttaaat ataatctcat ttggcttttt cagaagttcc ctatcccttt tatttctttt
     6661 attctaaatt tacgcgcaat tagaaataat taacgagcat attcataatg aacactcaca
     6721 aagtaattgc tgtgatagtg acatatcgaa gaaaaaactt tcttgataaa gtactatcag
     6781 cccttctcaa tcaaacagtt cctttacata aagtaattgt tgttgataat aatagttcag
     6841 atggtacgga cgatattgtt aaggaatata ttaataaatt tcctgaagtt attagttacc
     6901 ataacaccca tgaaaattta ggaggggcag gaggttttta tacaggaatg aagcttatcg
     6961 aaaaatataa ctataactat gattatgctt ggttaatgga tgatgatctc attccgaaca
     7021 acgattgttt agaaataatg ctaacagcga acatcaaggg tataatacag ccgatgcgct
     7081 ttaatcttga tggctcatgt gctgaattat ctgcgttaaa atatgatctc aactcttttt
     7141 ttagattaaa gcctaaagga gatacagtta aagattttgt agcaagtaat ccattactac
     7201 cggatttcat taatattgag accgttcctt ttgaagggcc attaatacat aaagatattg
     7261 taactgctgt tggtttgcct gatccacgat tttttatttt tggtgatgat acagattatt
     7321 ctatacgaac attaaaattg aattatccta ttatatgtat gccccgagct agagcaagtc
     7381 gcctcttaat aaataatcaa agaaacgatt tgttatcttg gaaaggtttt tttatgttgc
     7441 gcaatctttt tcacctttat tttaaacatg gcaatggcta tattgcaaga acaaagcctt
     7501 atattattgc tgtgtgttac ttactactat gcttggtaaa aggtaatttt aagcaatctt
     7561 taattacagt agatgcttta aaaagcgcac gattgttgat gaataatgat aaatataaac
     7621 ctaaagctta atgaatacgc tttttgtata tgaaaaacat attatatttg taatttaatg
     7681 cagacagtag atttgataaa aaacatgttc agtccgcaac ccgcggtaac cccctgacag
     7741 gagtaaacaa tgtcaaagca acagatcggc gtcgtcggta tggcagtgat g
//