LOCUS       KP710589                6897 bp    DNA     linear   BCT 29-MAR-2016
DEFINITION  Escherichia coli strain F 11621-41 serotype O38:K-:H26 O antigen
            gene cluster, complete sequence.
ACCESSION   KP710589
VERSION     KP710589.1
KEYWORDS    .
SOURCE      Escherichia coli
  ORGANISM  Escherichia coli
            Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales;
            Enterobacteriaceae; Escherichia.
REFERENCE   1  (bases 1 to 6897)
  AUTHORS   DebRoy,C., Fratamico,P.M., Yan,X., Baranzoni,G., Liu,Y.,
            Needleman,D.S., Tebbs,R., O'Connell,C.D., Allred,A., Swimley,M.,
            Mwangi,M., Kapur,V., Raygoza Garay,J.A., Roberts,E.L. and Katani,R.
  TITLE     Comparison of O-Antigen Gene Clusters of All O-Serogroups of
            Escherichia coli and Proposal for Adopting a New Nomenclature for
            O-Typing
  JOURNAL   PLoS ONE 11 (1), E0147434 (2016)
   PUBMED   26824864
  REMARK    Publication Status: Online-Only
REFERENCE   2  (bases 1 to 6897)
  AUTHORS   Baranzoni,G.M., Yan,X., Fratamico,P.M., Debroy,C., Tebbs,R.S.,
            Liu,Y., O'Connell,C.D., Swimley,M. and Matheny,S.
  TITLE     Direct Submission
  JOURNAL   Submitted (22-JAN-2015) Molecular Characterization of Foodborne
            Pathogens, United States Department of Agriculture, 600 E Mermaid
            LANE, WYNDMOOR, PA 19038, USA
COMMENT     ##Assembly-Data-START##
            Assembly Method       :: CLC Genomics Workbench v. 7.0
            Coverage              :: >50X
            Sequencing Technology :: IonTorrent
            ##Assembly-Data-END##
FEATURES             Location/Qualifiers
     source          1..6897
                     /organism="Escherichia coli"
                     /mol_type="genomic DNA"
                     /strain="F 11621-41"
                     /serotype="O38:K-:H26"
                     /db_xref="taxon:562"
     misc_feature    68..6721
                     /note="O antigen gene cluster"
     gene            68..883
                     /gene="wffK"
     CDS             68..883
                     /gene="wffK"
                     /codon_start=1
                     /transl_table=11
                     /product="glycosyl transferase family 2"
                     /protein_id="AJR19361.1"
                     /translation="MKIAIGISTLNNGINQVWEKIKNIPDEFLIIISHQVTDKQKQYG
                     DICKKNNVIIITTYCKGLSKSRNILLATAFQKSVDYMIISDDDVAYLVNGLYELKDRI
                     IEDRGKYHYQIQSCTQDGRLRKKYPLMRKRLNRLSAFNISSIEMCLNVNQIQECNVWF
                     DECFGLGARYKAGEEPIFVTDLMKTKNNIIFIPVTITVHPIESSGKKIYNETDALSDR
                     SAIFVRCGGRYLGLLYIFIFWVKKFLFKKKCGRIKKIAALFILLKGYSRYEYL"
     gene            870..2096
                     /gene="wzy"
     CDS             870..2096
                     /gene="wzy"
                     /codon_start=1
                     /transl_table=11
                     /product="O-antigen polymerase"
                     /protein_id="AJR19362.1"
                     /translation="MNIYKNSNENSLSNNNCIVYFIPLLLLATFFFPLVVLIFIGALS
                     PLLHPVLRNFYFYALLVFIIIFFSTLKPFGDIAEYLHVYHELNYNLIDVFGYSRFGDG
                     LEFMFLAIMKFIGYISGGNDEVFLLSTYFLIVFFLSKILKDVDKKYKLFLLSLFFFNL
                     GFIEVTSYFLRQVLSVVVFLYAINERSFKKYIFFLLSVFFHMSAVVNVFIYAVYMIFG
                     SKKYPYAKIVFSILIGLLVVFFVVYNTPIYSVLLSKFTSVSGNDKFTRLPLNYIIITV
                     VNICFIIMIGKRSKCDDFNKILFCKECFLFLILLPFPALSNRLGMIIFGFYPYFILPY
                     LKAFERKGKSKYALLICLYVVNLVPFLYLMYNVSLGNNMFTFLNNHPFTEGVYGMIDY
                     ILEAIDKGVNYINEGN"
     gene            2151..3320
                     /gene="wzx"
     CDS             2151..3320
                     /gene="wzx"
                     /codon_start=1
                     /transl_table=11
                     /product="O-antigen flippase"
                     /protein_id="AJR19363.1"
                     /translation="MLVFQLSLFASIKRIYGLDILGSWATIMNISQILLSLFLFGIDI
                     VVVKRIVENPSSTGTEIGCALFLQFLGLLLYASAFITIVIYFYYDIPFAFIFVAILIV
                     ANFFSLYAKVIFFHYSALVESKYRAITILSSVAVSYGYLWCCIYFGWHVFYAYVFFYL
                     IQALFSFTIYKFYFPYSAKWTIDLELVKMYFFMGSKLIVSTISVSLFTQCDVILLESL
                     TGTKEAGAFSAALRLSAIWFMCGGLIANAFFPKIVQLEKIGEEESFIFLKWICGVVSV
                     ISIYGAIIMIALSPIIIKILYGDNMDLSAQVLMVHMWSGVFVFLGSFSSKWLFSKNYI
                     NLEVIKTIIAAILNITLNIIVIPKYGAVGAASVSLLSYFIANFFIFIFIPKTKNV"
     gene            3390..4256
                     /gene="wffL"
     CDS             3390..4256
                     /gene="wffL"
                     /codon_start=1
                     /transl_table=11
                     /product="glycosyl transferase family 2"
                     /protein_id="AJR19364.1"
                     /translation="MSISVITPVFNRALLVYELYESLMSQQSYDFEWVIIDDGSTDNL
                     KEVIDKIASTSPFKIIYRYKKNGGKHTALNIGIEMSSFNWIFIVDSDDILTPNAIALA
                     NEKIQAIVDDKCKGMVFLRGYKTTKEIVGKAETIENISLEKFAGTKGDKALIIKRDSL
                     LKNQFPVFDGENFITEALVWNSILENGYFKYFNEIIYYSEYLPGGLTSNYTDLLRKNI
                     NGTMAFVINNLNLKGLGINVIKQTVFHFIPIFNISNLIVVKKKTKFTVFVLFITCLFL
                     VKIKNKVKGKSL"
     CDS             4253..4939
                     /codon_start=1
                     /transl_table=11
                     /product="glycosyltransferase sugar-binding region
                     containing DXD motif protein"
                     /protein_id="AJR19365.1"
                     /translation="MIPAIIHYIWLGKSEIPKIYLDCMESWKEHAVNYDCYLWNEDSY
                     RKEFGQNDFVEEMIQRKKFAFAADLIRCDVLYRFGGIYLDTDMELVRDISALRKNIAF
                     IGEEDIDTPSCGILGCEPKFWLFQELKAAVIKANGMQTIPFLLKNILDLHGVKKIDSQ
                     DISTIKDITIYSDKYFYPYNPYGSAKRSQLLYRYITKDCYAIHHWAKSWKLSFLERIK
                     RKIIMRYRKE"
     gene            4941..5972
                     /gene="galE"
     CDS             4941..5972
                     /gene="galE"
                     /codon_start=1
                     /transl_table=11
                     /product="UDP-glucose 4-epimerase"
                     /protein_id="AJR19366.1"
                     /translation="MNILVTGGAGYIGSHTVLRLLENENEITVVDNLVNSSSEVIKRV
                     ENITQKSICFIEMDILNTELLHEVIINKDIDAVIHFAGLKSVSESISRPLEYYKNNVQ
                     GTISVLSAMINSKAKKIIFSSSATVYGEPEQIPLNEKCKVGGTTNPYGTSKLMAEQIL
                     CDFAKANYGFDIISLRYFNPVGAHPSGMIGEAPNGIPNNLVPYLTKVAIGELDSLKIF
                     GNDYPTRDGYGVRDFIHVMDLADGHIAALNAEFKENSIRIYNLGTGKGYSVLELVDTF
                     ERIIARKINKCVISRRDGDIAECWSDPMLAFNELGWSAKFNLEDMLRDSWNWQIKNPK
                     GYDQNKGRK"
     gene            5975..6721
                     /gene="wfgD"
     CDS             5975..6721
                     /gene="wfgD"
                     /codon_start=1
                     /transl_table=11
                     /product="UDP-Glc:alpha-D-GlcNAc-diphosphoundecaprenol
                     beta-1,3-glucosyltransferase"
                     /protein_id="AJR19367.1"
                     /translation="MDYLVSIIMPSYNSEFTIKESIKSVIEQTYSNWELLITDDCSTD
                     RTCQIVKEFVEQDDRIKLFVSDKNKGAGAARNNSIKESSGRFLAFLDSDDLWAPDKLK
                     EQINYMIMNGYALTYTAYSKIDAYGNIKKDIQPPSKVDFSSLLKSNVIGCLTAIYDTE
                     VVGKVYMPLIRKRQDMALWLIILQKIDYAHCLNKNLAFYREGHLSLSSNKIKIIKSQW
                     EFYRYYLGFGYVKAMYYFLHYIQRALRKHA"
ORIGIN      
        1 tctggtagct gtaaagccag gggcggtagc gttatttata ttcataggtc taaaatagat
       61 aaccgagatg aaaatagcta taggtatatc aacattaaat aatggaataa atcaggtttg
      121 ggaaaaaata aaaaatatac cagatgaatt tttaataata atttcacatc aagtgactga
      181 taagcaaaaa caatatgggg atatttgtaa gaaaaacaat gtgataataa taacgacata
      241 ctgtaaaggg ttaagtaaaa gcagaaatat tttattggct actgcatttc aaaaaagcgt
      301 tgattatatg attataagtg atgatgatgt cgcttacctt gttaatggac tttatgaact
      361 aaaagataga ataatcgaag atagaggcaa gtatcattat caaattcaaa gttgtacgca
      421 agacggaaga ctaaggaaaa aatatccgct aatgaggaaa aggctaaata gattgtcagc
      481 atttaacatt tcttcaatag aaatgtgcct taatgtgaat caaatacagg aatgtaatgt
      541 ttggtttgat gagtgtttcg gattgggggc tagatataaa gcgggtgaag aaccgatttt
      601 tgttacggat cttatgaaaa caaaaaataa tattattttt atacctgtca caattactgt
      661 gcatccaata gaaagttctg gtaaaaaaat ttataatgaa acagatgccc tatcagatag
      721 aagtgctatt tttgtacgct gtggtggtag atatttaggg ttactatata tcttcatatt
      781 ctgggtgaaa aaatttcttt ttaaaaaaaa atgcggcaga attaaaaaaa tagctgcatt
      841 atttattctg ctaaaaggat attcccgata tgaatattta taaaaacagc aatgaaaatt
      901 cattgtcaaa taataactgt attgtatatt ttattccttt gttattattg gcaacattct
      961 ttttccctct tgttgtattg atatttattg gtgccctctc gccattgtta cacccagtat
     1021 tgcgtaattt ttatttttat gcactattgg tgtttattat tatttttttt tccacactaa
     1081 aaccatttgg agatattgca gagtatcttc atgtttacca tgagttaaat tataatttaa
     1141 ttgatgtttt tggctattca agatttggtg atgggctgga atttatgttt ttagccatca
     1201 tgaaatttat tgggtatatt tcaggtggca atgatgaagt atttttactt tctacttatt
     1261 ttctgatcgt ctttttttta tccaagattc ttaaagatgt tgataaaaaa tataaacttt
     1321 ttttattatc gcttttcttt tttaacttag gatttataga agttacatca tattttcttc
     1381 gacaggtttt atctgtggtt gtttttctgt atgctataaa tgagcgttca tttaaaaaat
     1441 atatcttttt tttgttgagt gttttttttc acatgtctgc agttgttaat gtttttatat
     1501 atgcagttta tatgatcttt ggttcaaaaa aatacccata tgcaaaaatt gttttttcta
     1561 ttcttattgg acttttagtt gtgttttttg tggtttataa tacgccaatt tattctgtgc
     1621 tattatcgaa attcacttca gtttctggga acgataaatt cacacgtttg ccgttaaatt
     1681 atattatcat tacagttgta aatatatgct ttattataat gatagggaag agaagtaaat
     1741 gtgatgattt taataagatt ttattttgca aggagtgttt tctattctta atattattac
     1801 ctttcccagc actttcgaat agattgggta tgattatttt tggtttttat ccctacttta
     1861 tcttacccta tctaaaagct tttgagagaa aaggtaagag taaatatgcc ctacttattt
     1921 gtttatatgt agtaaattta gtgccttttt tgtatttaat gtataatgtg tctttaggga
     1981 ataacatgtt tactttccta aataatcatc catttactga gggtgtgtat ggtatgattg
     2041 attacatatt ggaggcaatt gataaaggtg ttaactatat caatgaaggt aactagcacg
     2101 tgaaaaaata cttaagtaac ctattttggt tgttgagtga ccgagtgttc atgctggtat
     2161 ttcagttgtc actttttgct tctatcaaaa gaatctatgg gttagatatc ttgggtagtt
     2221 gggcaacgat aatgaatatc tcacaaatat tactatcgtt atttttattt ggcattgata
     2281 tagtcgtagt taaaagaatt gttgaaaatc catcatcaac gggcactgag attggatgtg
     2341 cattattctt acagttttta ggactgttat tatatgcatc tgcttttatt accattgtta
     2401 tttattttta ctatgatata ccgttcgctt ttatattcgt ggcgatatta atcgttgcta
     2461 atttttttag cttgtatgct aaagtaatat tttttcatta ttcggcattg gttgaatcaa
     2521 aatatcgtgc tatcacaatt ctaagtagtg tggccgtttc atatggttat ctttggtgtt
     2581 gtatatattt tggttggcat gtcttttatg cttatgtttt cttttattta attcaagctc
     2641 tttttagttt tactatatac aaattctatt tcccttattc agcaaaatgg acgattgatt
     2701 tagaattagt caaaatgtat ttttttatgg ggagtaaact cattgtatca actattagtg
     2761 tatcactatt tacacagtgt gatgtgatct tattagagtc tcttacaggt acaaaagagg
     2821 ctggggcatt tagtgcagct cttaggttat cagccatctg gtttatgtgt ggaggtttga
     2881 tagcgaacgc tttttttcca aaaattgttc agcttgaaaa aataggagag gaagaatcat
     2941 ttatcttttt gaaatggata tgcggagttg taagtgtaat ctctatatat ggtgcaatta
     3001 ttatgattgc tctgtcacct ataattataa aaatactata tggcgataat atggatttat
     3061 cggcgcaagt attaatggtt catatgtgga gtggtgtttt tgttttttta ggatcatttt
     3121 catctaaatg gttatttagt aagaattata ttaatttaga agtgataaaa accattattg
     3181 ctgcaatttt gaatataact ttgaatatca ttgttatacc aaaatatgga gcggttggcg
     3241 ctgcttctgt atcattgctt tcttatttta ttgctaactt tttcattttt atatttatac
     3301 caaaaactaa aaatgtttaa aatgcaatta cagagtttga agtatattat ttttccatgg
     3361 cgtttgatta atgattttgg aagggttaga tgtcaatttc agtaattact cctgtattca
     3421 atcgagcttt gttagtctat gaattatatg aatccttaat gtcacaacaa tcttatgatt
     3481 ttgaatgggt gataattgat gatggttcta ctgataattt aaaagaggtt attgacaaaa
     3541 tagcgtcgac atccccattt aaaatcatat atagatataa gaagaacggt ggaaaacaca
     3601 cagccttaaa tatagggata gaaatgtcat cgtttaactg gatatttatt gttgatagtg
     3661 atgatatatt aacgcccaac gctattgctc tagccaatga aaaaatccag gcaattgttg
     3721 atgataaatg taaagggatg gttttcctga gggggtataa aactacaaaa gagattgtag
     3781 gtaaagcgga aacaattgaa aatatttctc tggaaaagtt tgcaggtaca aagggagata
     3841 aagcattaat cataaaacgt gattctttac ttaaaaatca atttcctgtt tttgatggtg
     3901 aaaattttat aactgaggca ttggtttgga attcaatttt agaaaatggc tactttaaat
     3961 atttcaatga gatcatctat tatagtgaat atttaccagg aggtttaact tctaattata
     4021 ctgatctttt gcggaaaaat atcaatggca cgatggcttt tgttatcaat aacttgaatc
     4081 taaaaggtct tggtattaat gtcattaaac aaaccgtttt tcattttatt cccattttta
     4141 atattagtaa tctaatagtt gtaaagaaaa agacaaagtt tactgttttc gttttattta
     4201 ttacatgtct ttttcttgta aaaatcaaga ataaagttaa aggaaaatcg ttatgatacc
     4261 ggcaataatt cattatattt ggcttggtaa aagtgaaatc cctaaaatat atttagattg
     4321 tatggagtcg tggaaagaac atgctgtaaa ttacgattgc tatttatgga atgaggattc
     4381 ttatagaaag gaatttggtc agaatgattt tgttgaagaa atgatccaaa ggaaaaagtt
     4441 tgcatttgct gcagatttga tccgatgtga tgtattatat cgttttggtg gtatatatct
     4501 cgatactgat atggaattag ttcgggatat ttctgcattg cgaaaaaata ttgcatttat
     4561 cggcgaagaa gatattgata cgcctagttg tggtattttg ggttgtgaac ccaaattttg
     4621 gcttttccaa gagctaaaag cagctgtcat aaaagcaaat ggtatgcaaa caattccttt
     4681 tcttttaaag aatattttgg acttacatgg tgtaaaaaaa atagattcac aagatatttc
     4741 tactattaaa gatatcacaa tatactctga taagtatttt tatccatata acccttatgg
     4801 tagtgccaaa cgatcacaat tactttatag atatataaca aaagattgtt atgctataca
     4861 tcattgggct aaaagttgga agctttcttt tttagagaga attaaaagaa aaattatcat
     4921 gcgatatcgt aaggagtaat atgaatatcc tagtaacagg tggtgctggt tatattggtt
     4981 cgcatacggt acttcgctta ttagaaaatg aaaatgaaat tactgttgtg gataatttgg
     5041 ttaattcttc atctgaggta attaagcgtg tagagaatat cacgcaaaaa agcatttgct
     5101 ttatcgaaat ggatatactt aacacagaat tattacacga agtaattatt aataaagata
     5161 ttgatgcggt cattcatttt gctgggttga aatctgtttc tgagtcaatc agtaggcctc
     5221 ttgaatatta taagaataat gttcagggaa caattagtgt ccttagtgct atgataaata
     5281 gcaaagccaa gaaaattatt tttagttctt ctgcaacggt ttatggtgag cctgaacaaa
     5341 ttccattaaa tgaaaaatgt aaagtaggtg gtacaacaaa tccttatggc acttcgaaat
     5401 taatggcaga acaaatcctt tgtgattttg caaaagcaaa ttatggtttt gatattattt
     5461 cattacgata ctttaatccg gtaggagcac atcccagtgg aatgataggt gaggctccga
     5521 atgggattcc taataatctt gtaccatatc tcactaaggt tgctataggc gaattagatt
     5581 cattgaaaat atttgggaat gattatccta ccagggacgg atatggagtt cgggatttta
     5641 ttcatgttat ggatcttgcc gatggacata ttgctgcttt gaatgcagag tttaaagaaa
     5701 attctatcag gatatataat ttaggtacag gtaaaggata ttcagtatta gagttggttg
     5761 atactttcga gagaattata gctagaaaaa taaataagtg tgttatatca aggcgagatg
     5821 gagatatagc agaatgctgg tctgatccaa tgttagcctt caatgaactc ggttggtcag
     5881 ccaaattcaa tttagaggat atgctacgag attcttggaa ttggcaaata aaaaacccga
     5941 aaggctacga tcaaaataaa gggaggaaat aataatggat tatcttgtgt caattattat
     6001 gcccagttat aattctgaat tcacaattaa ggaaagtata aaatcggtaa tagaacaaac
     6061 atattctaat tgggagttgt taattactga tgactgctca acagatagaa cttgtcagat
     6121 agttaaagaa tttgttgagc aagatgacag gataaaactt tttgtttcag ataaaaataa
     6181 aggcgcaggc gcagcaagaa acaactcaat aaaagagtct agtggacgat ttttggcttt
     6241 tttagatagt gatgatctat gggcacccga taaactaaaa gagcagatta attatatgat
     6301 tatgaatggc tacgccctta cttatactgc atatagtaaa attgatgcat atggtaatat
     6361 taaaaaagat attcagcccc catcaaaagt cgatttttct tctttactga aatccaatgt
     6421 tatcggatgc cttactgcaa tttatgacac ggaagttgtg ggtaaagtat atatgccact
     6481 catacgaaag cgtcaagata tggcattatg gctaataatc ctgcaaaaaa tagattatgc
     6541 ccactgccta aataaaaacc tagcatttta tcgcgaaggt catttaagtt tatcatcgaa
     6601 taagataaaa ataataaaat cgcaatggga gttttaccgt tattatcttg gttttggata
     6661 tgtaaaggca atgtattatt ttctgcatta tatccagcgt gcattaagaa aacacgctta
     6721 agagtttaac tctatttgtt taattttaaa acattgataa taattgagtc tatcattcat
     6781 ttattgcatg tcttatttta tatctgttat ccttattaac cgtcaaatcc cgcggtaacc
     6841 ccctgacagg agtaaacaat gtcaaagcaa cagatcggcg tagtcggtat ggcagtg
//