88// option. This file may not be copied, modified, or distributed
99// except according to those terms.
1010
11- //! An owned, growable string that enforces that its contents are valid UTF-8.
11+ //! A UTF-8 encoded, growable string.
12+ //!
13+ //! This module contains the [`String`] type, a trait for converting
14+ //! [`ToString`]s, and several error types that may result from working with
15+ //! [`String`]s.
16+ //!
17+ //! [`String`]: struct.String.html
18+ //! [`ToString`]: trait.ToString.html
1219
1320#![ stable( feature = "rust1" , since = "1.0.0" ) ]
1421
@@ -29,7 +36,190 @@ use str::{self, FromStr, Utf8Error, Chars};
2936use vec:: Vec ;
3037use boxed:: Box ;
3138
32- /// A growable string stored as a UTF-8 encoded buffer.
39+ /// A UTF-8 encoded, growable string.
40+ ///
41+ /// The `String` type is the most common string type that has ownership over the
42+ /// contents of the string. It has a close relationship with its borrowed
43+ /// counterpart, the primitive [`str`].
44+ ///
45+ /// [`str`]: ../primitive.str.html
46+ ///
47+ /// # Examples
48+ ///
49+ /// You can create a `String` from a literal string with `String::from`:
50+ ///
51+ /// ```
52+ /// let hello = String::from("Hello, world!");
53+ /// ```
54+ ///
55+ /// You can append a [`char`] to a `String` with the [`push()`] method, and
56+ /// append a [`&str`] with the [`push_str()`] method:
57+ ///
58+ /// ```
59+ /// let mut hello = String::from("Hello, ");
60+ ///
61+ /// hello.push('w');
62+ /// hello.push_str("orld!");
63+ /// ```
64+ ///
65+ /// [`push()`]: #method.push
66+ /// [`push_str()`]: #method.push_str
67+ ///
68+ /// If you have a vector of UTF-8 bytes, you can create a `String` from it with
69+ /// the [`from_utf8()`] method:
70+ ///
71+ /// ```
72+ /// // some bytes, in a vector
73+ /// let sparkle_heart = vec![240, 159, 146, 150];
74+ ///
75+ /// // We know these bytes are valid, so we'll use `unwrap()`.
76+ /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
77+ ///
78+ /// assert_eq!("💖", sparkle_heart);
79+ /// ```
80+ ///
81+ /// [`from_utf8()`]: #method.from_utf8
82+ ///
83+ /// # UTF-8
84+ ///
85+ /// `String`s are always valid UTF-8. This has a few implications, the first of
86+ /// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is
87+ /// similar, but without the UTF-8 constraint. The second implication is that
88+ /// you cannot index into a `String`:
89+ ///
90+ /// ```ignore
91+ /// let s = "hello";
92+ ///
93+ /// println!("The first letter of s is {}", s[0]); // ERROR!!!
94+ /// ```
95+ ///
96+ /// [`OsString`]: ../ffi/struct.OsString.html
97+ ///
98+ /// Indexing is intended to be a constant-time operation, but UTF-8 encoding
99+ /// does not allow us to do this. Furtheremore, it's not clear what sort of
100+ /// thing the index should return: a byte, a codepoint, or a grapheme cluster.
101+ /// The [`as_bytes()`] and [`chars()`] methods return iterators over the first
102+ /// two, respectively.
103+ ///
104+ /// [`as_bytes()`]: #method.as_bytes
105+ /// [`chars()`]: #method.chars
106+ ///
107+ /// # Deref
108+ ///
109+ /// `String`s implement [`Deref`]`<Target=str>`, and so inherit all of [`str`]'s
110+ /// methods. In addition, this means that you can pass a `String` to any
111+ /// function which takes a [`&str`] by using an ampersand (`&`):
112+ ///
113+ /// ```
114+ /// fn takes_str(s: &str) { }
115+ ///
116+ /// let s = String::from("Hello");
117+ ///
118+ /// takes_str(&s);
119+ /// ```
120+ ///
121+ /// [`&str`]: ../primitive.str.html
122+ /// [`Deref`]: ../ops/trait.Deref.html
123+ ///
124+ /// This will create a [`&str`] from the `String` and pass it in. This
125+ /// conversion is very inexpensive, and so generally, functions will accept
126+ /// [`&str`]s as arguments unless they need a `String` for some specific reason.
127+ ///
128+ ///
129+ /// # Representation
130+ ///
131+ /// A `String` is made up of three components: a pointer to some bytes, a
132+ /// length, and a capacity. The pointer points to an internal buffer `String`
133+ /// uses to store its data. The length is the number of bytes currently stored
134+ /// in the buffer, and the capacity is the size of the buffer in bytes. As such,
135+ /// the length will always be less than or equal to the capacity.
136+ ///
137+ /// This buffer is always stored on the heap.
138+ ///
139+ /// You can look at these with the [`as_ptr()`], [`len()`], and [`capacity()`]
140+ /// methods:
141+ ///
142+ /// ```
143+ /// use std::mem;
144+ ///
145+ /// let story = String::from("Once upon a time...");
146+ ///
147+ /// let ptr = story.as_ptr();
148+ /// let len = story.len();
149+ /// let capacity = story.capacity();
150+ ///
151+ /// // story has thirteen bytes
152+ /// assert_eq!(19, len);
153+ ///
154+ /// // Now that we have our parts, we throw the story away.
155+ /// mem::forget(story);
156+ ///
157+ /// // We can re-build a String out of ptr, len, and capacity. This is all
158+ /// // unsafe becuase we are responsible for making sure the components are
159+ /// // valid:
160+ /// let s = unsafe { String::from_raw_parts(ptr as *mut _, len, capacity) } ;
161+ ///
162+ /// assert_eq!(String::from("Once upon a time..."), s);
163+ /// ```
164+ ///
165+ /// [`as_ptr()`]: #method.as_ptr
166+ /// [`len()`]: # method.len
167+ /// [`capacity()`]: # method.capacity
168+ ///
169+ /// If a `String` has enough capacity, adding elements to it will not
170+ /// re-allocate. For example, consider this program:
171+ ///
172+ /// ```
173+ /// let mut s = String::new();
174+ ///
175+ /// println!("{}", s.capacity());
176+ ///
177+ /// for _ in 0..5 {
178+ /// s.push_str("hello");
179+ /// println!("{}", s.capacity());
180+ /// }
181+ /// ```
182+ ///
183+ /// This will output the following:
184+ ///
185+ /// ```text
186+ /// 0
187+ /// 5
188+ /// 10
189+ /// 20
190+ /// 20
191+ /// 40
192+ /// ```
193+ ///
194+ /// At first, we have no memory allocated at all, but as we append to the
195+ /// string, it increases its capacity appropriately. If we instead use the
196+ /// [`with_capacity()`] method to allocate the correct capacity initially:
197+ ///
198+ /// ```
199+ /// let mut s = String::with_capacity(25);
200+ ///
201+ /// println!("{}", s.capacity());
202+ ///
203+ /// for _ in 0..5 {
204+ /// s.push_str("hello");
205+ /// println!("{}", s.capacity());
206+ /// }
207+ /// ```
208+ ///
209+ /// [`with_capacity()`]: #method.with_capacity
210+ ///
211+ /// We end up with a different output:
212+ ///
213+ /// ```text
214+ /// 25
215+ /// 25
216+ /// 25
217+ /// 25
218+ /// 25
219+ /// 25
220+ /// ```
221+ ///
222+ /// Here, there's no need to allocate more memory inside the loop.
33223#[ derive( PartialOrd , Eq , Ord ) ]
34224#[ stable( feature = "rust1" , since = "1.0.0" ) ]
35225pub struct String {
@@ -190,7 +380,7 @@ impl String {
190380 /// // some bytes, in a vector
191381 /// let sparkle_heart = vec![240, 159, 146, 150];
192382 ///
193- /// // We know these bytes are valid, so just use `unwrap()`.
383+ /// // We know these bytes are valid, so we'll use `unwrap()`.
194384 /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
195385 ///
196386 /// assert_eq!("💖", sparkle_heart);
@@ -252,7 +442,7 @@ impl String {
252442 /// // some bytes, in a vector
253443 /// let sparkle_heart = vec![240, 159, 146, 150];
254444 ///
255- /// // We know these bytes are valid, so just use `unwrap()`.
445+ /// // We know these bytes are valid, so we'll use `unwrap()`.
256446 /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
257447 ///
258448 /// assert_eq!("💖", sparkle_heart);
0 commit comments