LOCUS       KJ778808                6671 bp    DNA     linear   BCT 29-MAR-2016
DEFINITION  Escherichia coli strain H 17a serotype O83:H31 O-antigen gene
            cluster, complete sequence.
ACCESSION   KJ778808
VERSION     KJ778808.1
KEYWORDS    .
SOURCE      Escherichia coli
  ORGANISM  Escherichia coli
            Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales;
            Enterobacteriaceae; Escherichia.
REFERENCE   1  (bases 1 to 6671)
  AUTHORS   DebRoy,C., Fratamico,P.M., Yan,X., Baranzoni,G., Liu,Y.,
            Needleman,D.S., Tebbs,R., O'Connell,C.D., Allred,A., Swimley,M.,
            Mwangi,M., Kapur,V., Raygoza Garay,J.A., Roberts,E.L. and Katani,R.
  TITLE     Comparison of O-Antigen Gene Clusters of All O-Serogroups of
            Escherichia coli and Proposal for Adopting a New Nomenclature for
            O-Typing
  JOURNAL   PLoS ONE 11 (1), E0147434 (2016)
   PUBMED   26824864
  REMARK    Publication Status: Online-Only
REFERENCE   2  (bases 1 to 6671)
  AUTHORS   Yan,X., Fratamico,P.M., Tebbs,R.S., O'Connell,C.D., Swimley,M.,
            Baranzoni,G.M., Debroy,C. and Liu,Y.
  TITLE     Direct Submission
  JOURNAL   Submitted (30-APR-2014) Molecular Characterization of Foodborne
            Pathogens Research Unit, USDA-ARS, 600 East Mermaid Lane, Wyndmoor,
            PA 19038, USA
COMMENT     ##Assembly-Data-START##
            Assembly Method       :: CLC Genomics Workbench v. 7.0
            Coverage              :: >50X
            Sequencing Technology :: IonTorrent
            ##Assembly-Data-END##
FEATURES             Location/Qualifiers
     source          1..6671
                     /organism="Escherichia coli"
                     /mol_type="genomic DNA"
                     /strain="H 17a"
                     /serotype="O83:H31"
                     /db_xref="taxon:562"
     misc_feature    1..6671
                     /note="O-antigen gene cluster"
     gene            82..1524
                     /gene="wzx"
     CDS             82..1524
                     /gene="wzx"
                     /codon_start=1
                     /transl_table=11
                     /product="O-antigen flippase"
                     /protein_id="AIG62837.1"
                     /translation="MSTFNNAKWVGLSQFTKVILQIMALIIFSRLLDPLEYGIMAMAT
                     VISNFALIIRDLGTGAAVIQRKVISNDLLSSIFWLNIIMGLLVMVAIIICAPLLSEFF
                     KESKLCNVLLLLSVTFPLASAAIVHQANLEREFQFAKVCSIEIASSLISFLIGIVCAI
                     KNYGVYSLVALTISQAMISTLGMWFASKWRPQFKVNWSEIQEVYSFSGNLTLFNMVNY
                     FSRNSDGIIIGRFFSSAILGAYSLAYRIMLFPVQSLTLVISRSLYPVISRMQDDVNSI
                     NHVYMRTLAFISTLTLPLMMGLWSIRNEFVMEILGAKWISVVGILSWLAPTGYIQSLV
                     STTGTILMACGRANIIFYLGVAAAVLQVSAFLIGAQFNITTLSMLYFYSNLINSFIAM
                     YFVMKCINGSLMILIKKLVPSIVSTGVMLIYVKIILKIKEIWLWSDLVTLIVSILGGM
                     TIYTFIYIVFFKQSIRENFPSTLSTKIIKS"
     gene            1521..2756
                     /gene="wzy"
     CDS             1521..2756
                     /gene="wzy"
                     /codon_start=1
                     /transl_table=11
                     /product="O-antigen polymerase"
                     /protein_id="AIG62838.1"
                     /translation="MRNRILTLMQASAIFMFLTKLPMTTVLVVILSLATIFYKLLQNK
                     LYFDKLNNKTIVVIIAMLLYMLCRIMSQYVVGIDLYDVDIDYIVTISLAFILFVSILF
                     HRDAYAILYAIIASSLFTGGWAIFELIFNFNTLSSRFSDPGNIFYMSGNRVPTAFYYN
                     ENDMLYFLVLFLPITFFFFRRRLVSLVYFLMVLTLALYIASKAAVITLLIYVMWYFIF
                     GVKNISDILKRIVLSILFVSIGVLILYLFGKDALDLQIIDKVVYRFSGLVDFINGNGG
                     DSSSLERFSIYLAVASFIYSNISMLFVGFGNFSYYEGIILNHYPLRIADFHNMHFEII
                     TLFGLPFYILLGCVFFYMYKMNKNISFNNTKVFKLMMLTFLTFLAILSSSVLKYPSFY
                     VFIMLLAVLCENKSRVNEY"
     gene            2746..3648
                     /gene="wfgD"
     CDS             2746..3648
                     /gene="wfgD"
                     /codon_start=1
                     /transl_table=11
                     /product="UDP-Glc:alpha-D-GlcNAc-diphosphoundecaprenol
                     beta-1,3-glucosyltransferase WfgD"
                     /protein_id="AIG62839.1"
                     /translation="MNIKVSIVIPTFGRPENLLRAISSVMRQTYNNLEIIVVDDNGKG
                     SENQIKTEELLNDEKYKDIIYIVLPNNKGGGIARNYGIEKASGEVITFLDDDDYYLCN
                     KVEVQLRHLIVNNLDISLCDMIIESDGSSRFKHSKRYSNAKGININEFLIDGVAFTPM
                     IMVRKNILLSAGGFLDTPRFQDHTLMLKLFLITDKVGHVESKLFVHCSNMNTRVSNSP
                     KSLKGFLIRHRLENYVMKNQRNIVEQVKFNQCAQVSPFLYKRRGYKYVSFMIWSLNYN
                     LSVKNVLLTFARVLKISCLAIFKD"
     CDS             3624..4343
                     /codon_start=1
                     /transl_table=11
                     /product="glycosyl transferase family 2"
                     /protein_id="AIG62840.1"
                     /translation="MLSNIQRLVRLLRLSLSTIDMGDNYNIIPQKKFTLSGVYRIKNA
                     ESSIELAIRSIIDVMDEVIVVDNESSDGTLDILIKLQKKYPNKIKIFHYNKKLCRAGK
                     NYADCVRSNPSGSLAKYYNYAFSKATSEYVMKCDANYIFTLKGKIDIINALNKNPDVL
                     CYPGVEIFGHHHSIEPFVYLRKLNYKYCDGLLWEFLHYERTAKIKKILNPCFVHIKRI
                     NYNKYIHSEHKGVDGLYGSHD"
     CDS             4327..4872
                     /codon_start=1
                     /transl_table=11
                     /product="hypothetical protein"
                     /protein_id="AIG62841.1"
                     /translation="MVATIKKITKQIVFFINFIFKGKNLVCLSNCREKYNLTREDIDY
                     IANETLHKSHKKINLTLLIREIDLYLLDITINSWAPISKCVNFKNINGVNDNFLKKIT
                     CEYGKNYLGHCEESGFFVPLGVILLPKWYNHFNLNCHVLNGVDIDGRRWQLDLTSGAL
                     NLVDCYNDLDVHFIMMRVNCD"
     gene            4865..5575
                     /gene="wfeD"
     CDS             4865..5575
                     /gene="wfeD"
                     /codon_start=1
                     /transl_table=11
                     /product="UDP-Gal:alpha-D-GlcNAc-diphosphoundecaprenol
                     beta-1,4-galactosyltransferase"
                     /protein_id="AIG62842.1"
                     /translation="MIDDIIKKTPNVIDIMGENCSNSRVITFVNPFSYFSVKDNPRVE
                     DIDYIFIDGILLVKLFNFTNKYNLERYSFDYSSLAGVVFNFCEREKLKVGLIGATSDE
                     IRVATKNIKMIHPQLTVEYTHSGYFLDDQERNNIINTLAENCDVIICGMGTPVQEEFA
                     LDLKALSRNKLIFTCGGFFSQTARKPDFYYSWVKKYNLMWLQRIIMYKHVRKRFFFDY
                     PKFVYKYIKQNLRNSSKR"
     CDS             5586..6407
                     /codon_start=1
                     /transl_table=11
                     /product="glycosyltransferase family 2"
                     /protein_id="AIG62843.1"
                     /translation="MLSINLTTTASRLELCSSTLWSLLHQTIIPDEINVWVSKEPYLA
                     DEGINEIPIFITELNSVRNIIKIKFTKNIGPYRKIIPILRESNADDIIVYADDDVIYG
                     CTWLETLYDEFLKNSCSVVVASRVRLMQYNVFGKLKSYLSFPLCTESILLENDFIITG
                     VGGCILSRSIIKDEFITNNEFMSIAPTTDDIWISKIIELSGGFVKCLPVNLKNINEIS
                     HEINALSQDNMVIKYKSNILIRFANKIINKVKCYFCLISSGNDVAIKKVNRYFDN"
ORIGIN      
        1 gacatgtacg cagcgctcag gtagctgtta agcttggggc ggtagcgtgc gtaatattaa
       61 taaaccgtgg agataagata attgtctacg ttcaataatg ctaaatgggt tgggctatca
      121 cagtttacta aagttatctt acagataatg gcactaataa tctttagtag gttattggat
      181 ccgcttgaat atggtattat ggcaatggct acagttatat ctaacttcgc tcttattata
      241 agggatttgg gaacaggtgc agccgttatt caaaggaaag ttatcagtaa tgatttgcta
      301 tcttcaatat tttggctgaa tatcatcatg gggttattag taatggtggc aataattatt
      361 tgtgctccat tattatctga attcttcaaa gaatcaaaat tatgcaatgt gttgttatta
      421 ctttcagtaa cattccctct ggctagtgca gcgattgtac accaggcaaa cctcgaaaga
      481 gaatttcaat ttgcaaaagt ttgttcaatt gaaatagctt cttcattaat atccttctta
      541 attggaatag tctgtgctat caaaaattat ggtgtctata gtttagtcgc attaacaatc
      601 agtcaagcaa tgatttcaac tcttggaatg tggtttgctt caaaatggag accacagttt
      661 aaggttaatt ggtctgaaat tcaggaagtt tattctttta gtggtaatct gactcttttt
      721 aatatggtga attatttttc aagaaatagt gatggaatta ttatcggtag atttttttcg
      781 agtgccattc taggtgccta ttcattagct tacagaataa tgttatttcc tgtacaaagc
      841 ttaaccttag ttatatcaag atcattatat ccagtgataa gcaggatgca agatgatgtc
      901 aatagtatca atcatgttta catgagaacg ttagcgttca tttctacttt aactttacca
      961 ctcatgatgg ggttatggtc gataaggaac gaatttgtta tggaaatttt aggagccaaa
     1021 tggatttcag ttgtgggaat actttcatgg ttggcaccta caggatatat ccaatctctt
     1081 gttagtacaa caggtactat tttaatggcg tgtggtagag ccaatataat attttacctc
     1141 ggtgttgcag ctgctgtttt acaagtttct gcatttctta ttggggcaca atttaatatt
     1201 acaacactct caatgcttta tttctattca aacttaatta atagctttat tgctatgtat
     1261 tttgtaatga aatgtataaa tggaagcttg atgatcttaa ttaaaaaatt agtcccttcg
     1321 attgtctcta caggtgttat gcttatatat gtgaagatta ttttgaaaat taaagagata
     1381 tggttatgga gtgatttagt aacattgatt gtttcaattc tgggcggtat gactatatat
     1441 acttttattt atatagtctt tttcaaacaa tcgattagag aaaattttcc ttcaacatta
     1501 tctactaaaa taataaaatc atgagaaata gaatacttac cttgatgcaa gccagtgcga
     1561 tttttatgtt tcttacaaaa ttgccgatga ctactgtact tgtggttata ttatccttag
     1621 caacaatttt ttataaactt cttcagaata aactatactt tgataagtta aataataaaa
     1681 caatagtggt aattatcgca atgttgctat atatgttatg tcgaattatg tcgcaatacg
     1741 tggtaggtat agatttatat gatgtggaca ttgattatat agtgacaata tcattagcgt
     1801 ttatattgtt tgtatctatt ttatttcatc gagatgcata tgcaatattg tatgcaatta
     1861 tagcatcttc attatttact ggaggatggg caatatttga attaatattt aattttaata
     1921 ctctatcttc acggtttagc gatcctggga atatttttta catgtcaggt aatagagtgc
     1981 caacagcatt ctattataat gaaaatgata tgctttattt tttggttttg ttcttaccaa
     2041 ttaccttttt tttctttagg agacgcttgg tgtcattagt atatttttta atggtgctga
     2101 cgcttgctct atatattgca tctaaagctg cggttataac attacttatt tatgtaatgt
     2161 ggtattttat ttttggtgta aaaaatataa gtgatatatt aaaaaggatc gtgctatcaa
     2221 tactttttgt ctctattggt gtgctgattc tatatttgtt tggaaaagac gcattagacc
     2281 tacaaataat agacaaggtc gtatatagat tttctgggtt ggttgatttt attaatggaa
     2341 atggcggaga cagtagttca ttagagcgat tttcaattta tttggccgtc gcaagtttta
     2401 tatatagtaa tattagtatg ctattcgtag gttttggcaa ttttagctat tatgaaggaa
     2461 taatattgaa tcactacccg ttaagaatag cggactttca taacatgcat tttgaaatta
     2521 tcacattgtt tggtcttcca ttttatatat tgttaggttg tgtgtttttt tatatgtata
     2581 aaatgaataa aaacatttca tttaataata ctaaggtatt taagttgatg atgttaactt
     2641 tccttacctt tctcgctata ttatctagtt ctgtgctcaa atatccatct ttttatgttt
     2701 ttattatgct tttggctgtg ttgtgtgaaa ataagagtcg agtaaatgaa tattaaagta
     2761 tcaatagtta tccctacctt tggtagacct gagaatcttc ttagggcaat cagcagtgta
     2821 atgagacaaa catataataa tttagaaatt atcgttgttg atgataatgg taagggtagc
     2881 gaaaaccaga ttaaaacgga agaattgctt aatgacgaaa agtataaaga tataatatac
     2941 atagttttac ctaataataa aggtggtgga attgctagga attacggtat agaaaaagca
     3001 agcggtgaag ttattacatt tttggatgat gatgattatt acttatgtaa taaagttgaa
     3061 gtccagttac gtcatctaat agttaataac ttagatatat ctttgtgcga tatgattatt
     3121 gaatctgatg gttcatcacg attcaaacat tcaaaacgtt attcaaatgc taaaggaata
     3181 aatataaatg agtttttgat tgatggtgta gcttttacac ctatgattat ggtgagaaaa
     3241 aatatactat taagtgctgg aggattttta gatacaccac gttttcaaga tcatacgctg
     3301 atgctaaagt tatttttgat tacggataag gtcggtcatg ttgaaagtaa attatttgta
     3361 cattgttcca atatgaatac aagagttagt aactctccca aatcgttgaa ggggtttcta
     3421 atacgacaca gattagaaaa ctatgtgatg aagaaccaaa ggaatatagt agagcaagtt
     3481 aaattcaatc aatgcgcaca agtatctcct ttcctttata aaagaagggg ttataaatat
     3541 gtttccttca tgatctggtc tttaaattat aatcttagtg taaaaaatgt attattaaca
     3601 tttgcgaggg ttttaaaaat atcatgctta gcaatattca aagattagta agattactaa
     3661 gattatcgct atcaacaatt gacatgggtg ataattataa tattataccg caaaagaaat
     3721 tcacattatc tggtgtttat aggataaaga atgcagagag ttcaatagag cttgccattc
     3781 gttcaattat agatgtaatg gatgaagtga ttgttgtaga taatgaatct tcagatggta
     3841 cattagatat tttaattaaa ctgcaaaaaa aatatcctaa taaaataaag attttccatt
     3901 ataacaaaaa actgtgtaga gcaggtaaaa attatgctga ttgtgtgaga agtaatccat
     3961 cagggagctt agccaaatat tataactatg ctttttccaa agcaactagc gaatatgtaa
     4021 tgaagtgtga tgccaattat attttcacgt tgaaaggtaa aatagatatt atcaatgcat
     4081 taaataagaa tcctgatgta ttatgctatc ctggtgttga aatatttggg caccatcact
     4141 caattgagcc atttgtatat ttaaggaaat taaattataa atactgtgat ggtttgttat
     4201 gggagttttt acattatgaa cgaactgcga aaataaagaa aatattaaat ccgtgctttg
     4261 tgcatataaa aagaatcaat tacaataagt atatacatag cgaacataaa ggagttgacg
     4321 gtttatatgg tagccacgat taaaaaaata actaagcaaa ttgttttttt cattaacttt
     4381 atatttaaag gtaaaaacct agtatgttta tcaaattgtc gcgaaaagta taatttaact
     4441 agagaagata tagattatat agccaatgaa acattgcata aatctcataa aaagattaat
     4501 ttaaccttac taattagaga aatagatctt taccttttag atataactat aaactcatgg
     4561 gcaccgattt caaaatgtgt taactttaaa aatataaacg gagttaatga taatttctta
     4621 aaaaaaatta cctgtgagta tgggaaaaat tatcttggtc attgtgagga atctggattt
     4681 tttgtgccct taggtgtcat tcttttacct aaatggtata accattttaa tttaaattgc
     4741 catgtgttaa atggagttga tattgatggt agacgatggc aactggacct tacctctgga
     4801 gcgcttaatc ttgttgattg ctataacgat cttgatgtcc attttattat gatgagggtg
     4861 aattgtgatt gatgatataa tcaaaaagac accaaatgta atcgatatta tgggcgaaaa
     4921 ctgctctaat tctcgagtaa ttacatttgt aaacccattt tcatattttt cagtgaagga
     4981 taaccctaga gtcgaggata tagattatat tttcattgac ggtatattgc ttgtcaaact
     5041 gtttaatttt acgaacaaat ataatcttga aagatatagt ttcgattact cctcattggc
     5101 aggtgtagtg tttaattttt gtgaaaggga aaaactaaaa gttggactta ttggtgctac
     5161 cagtgatgaa attagagttg caacaaaaaa tattaaaatg attcatccac aactgactgt
     5221 ggaatatact cactctggat attttttaga tgatcaagaa cgtaataata ttattaatac
     5281 gttagcagaa aactgcgatg tcatcatttg cggtatggga actcccgtac aagaagaatt
     5341 tgcattagac ttgaaagcat tatctcgaaa taagttaatt ttcacttgtg gaggtttttt
     5401 ttcccaaacg gcaagaaaac ctgattttta ttattcgtgg gtcaaaaaat ataacttgat
     5461 gtggcttcaa agaattataa tgtacaaaca tgttcgaaaa cgcttttttt ttgattatcc
     5521 aaaatttgtt tataaatata taaagcaaaa tttacgcaat tcttctaaga ggtgataatt
     5581 tcgaaatgtt atcaattaat ttaacaacta cagcgtcacg tttagaacta tgttcatcaa
     5641 ctttatggtc attattacat caaacaatta tacccgatga aataaatgta tgggtatcga
     5701 aagagcccta cttggctgat gagggcataa atgagatacc tatatttata acagaattga
     5761 attcagttag aaacattatc aaaattaaat ttacaaaaaa tataggtccc tacagaaaaa
     5821 taattccaat acttagagaa agtaacgcag acgacatcat tgtatatgcc gacgatgatg
     5881 tcatttatgg atgcacttgg cttgaaacat tatatgatga gtttttgaaa aactcatgtt
     5941 ctgtagttgt agcctctagg gttaggctta tgcagtataa tgtatttggt aaattgaaaa
     6001 gttatttatc gtttccatta tgcactgaaa gtattttgct tgaaaatgat tttattatta
     6061 ccggggtagg tggctgtatt ttatcccgct cgatcattaa agatgagttt attacaaata
     6121 atgaatttat gtccattgca cctacaactg atgatatatg gattagtaaa atcatagaac
     6181 tttctggggg atttgtaaaa tgcttacctg ttaacttaaa aaacataaat gaaatttcac
     6241 atgaaatcaa tgcattaagc caagacaaca tggtgataaa atataaaagt aatattttaa
     6301 ttcgtttcgc aaataaaatt attaataagg ttaagtgcta tttttgtttg atttcatcag
     6361 gaaatgatgt tgcaataaaa aaagtaaatc gttattttga taattaattt taatggggtg
     6421 ttaattgaat tgatatttca gttatattcc ttcacgacaa ctaaggggtt tatgaattta
     6481 atgctctgta attacttaaa taataagtct cagcgtaacg cttaaaagtt ttaattgaat
     6541 ttaaaatgga tctgttttta agttaagtat tagattatca tttactatcc ataaatcatt
     6601 tcacatgttc acccccctga caggagtaaa taatgtcaaa gcaacagatc ggcgtcgtcg
     6661 gtatggcagt g
//